* fix(security): block private/loopback/metadata IPs in link-understanding URL detection isAllowedUrl() only blocked 127.0.0.1, leaving localhost, ::1, 0.0.0.0, private RFC1918 ranges, link-local (169.254.x.x including cloud metadata), and CGNAT (100.64.0.0/10) accessible for SSRF via link-understanding. Add comprehensive hostname/IP blocking consistent with the SSRF guard already used by media/fetch.ts. * fix(security): harden link-understanding SSRF host checks * fix: note link-understanding SSRF hardening in changelog (#15604) (thanks @AI-Reviewer-QS) --------- Co-authored-by: Yi LIU <yi@quantstamp.com> Co-authored-by: Peter Steinberger <steipete@gmail.com>
75 lines
1.9 KiB
TypeScript
75 lines
1.9 KiB
TypeScript
import { isBlockedHostname, isPrivateIpAddress } from "../infra/net/ssrf.js";
|
|
import { DEFAULT_MAX_LINKS } from "./defaults.js";
|
|
|
|
// Remove markdown link syntax so only bare URLs are considered.
|
|
const MARKDOWN_LINK_RE = /\[[^\]]*]\((https?:\/\/\S+?)\)/gi;
|
|
const BARE_LINK_RE = /https?:\/\/\S+/gi;
|
|
|
|
function stripMarkdownLinks(message: string): string {
|
|
return message.replace(MARKDOWN_LINK_RE, " ");
|
|
}
|
|
|
|
function resolveMaxLinks(value?: number): number {
|
|
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
return Math.floor(value);
|
|
}
|
|
return DEFAULT_MAX_LINKS;
|
|
}
|
|
|
|
function isAllowedUrl(raw: string): boolean {
|
|
try {
|
|
const parsed = new URL(raw);
|
|
if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
|
|
return false;
|
|
}
|
|
if (isBlockedHost(parsed.hostname)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/** Block loopback, private, link-local, and metadata addresses. */
|
|
function isBlockedHost(hostname: string): boolean {
|
|
const normalized = hostname.trim().toLowerCase();
|
|
return (
|
|
normalized === "localhost.localdomain" ||
|
|
isBlockedHostname(normalized) ||
|
|
isPrivateIpAddress(normalized)
|
|
);
|
|
}
|
|
|
|
export function extractLinksFromMessage(message: string, opts?: { maxLinks?: number }): string[] {
|
|
const source = message?.trim();
|
|
if (!source) {
|
|
return [];
|
|
}
|
|
|
|
const maxLinks = resolveMaxLinks(opts?.maxLinks);
|
|
const sanitized = stripMarkdownLinks(source);
|
|
const seen = new Set<string>();
|
|
const results: string[] = [];
|
|
|
|
for (const match of sanitized.matchAll(BARE_LINK_RE)) {
|
|
const raw = match[0]?.trim();
|
|
if (!raw) {
|
|
continue;
|
|
}
|
|
if (!isAllowedUrl(raw)) {
|
|
continue;
|
|
}
|
|
if (seen.has(raw)) {
|
|
continue;
|
|
}
|
|
seen.add(raw);
|
|
results.push(raw);
|
|
if (results.length >= maxLinks) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
return results;
|
|
}
|