diff --git a/src/security/safe-regex.test.ts b/src/security/safe-regex.test.ts index 30fa27936..fe6f10427 100644 --- a/src/security/safe-regex.test.ts +++ b/src/security/safe-regex.test.ts @@ -4,11 +4,15 @@ import { compileSafeRegex, hasNestedRepetition } from "./safe-regex.js"; describe("safe regex", () => { it("flags nested repetition patterns", () => { expect(hasNestedRepetition("(a+)+$")).toBe(true); + expect(hasNestedRepetition("(a|aa)+$")).toBe(true); expect(hasNestedRepetition("^(?:foo|bar)$")).toBe(false); + expect(hasNestedRepetition("^(ab|cd)+$")).toBe(false); }); it("rejects unsafe nested repetition during compile", () => { expect(compileSafeRegex("(a+)+$")).toBeNull(); + expect(compileSafeRegex("(a|aa)+$")).toBeNull(); + expect(compileSafeRegex("(a|aa){2}$")).toBeInstanceOf(RegExp); }); it("compiles common safe filter regex", () => { diff --git a/src/security/safe-regex.ts b/src/security/safe-regex.ts index 4f4f6921a..04c9be61a 100644 --- a/src/security/safe-regex.ts +++ b/src/security/safe-regex.ts @@ -1,22 +1,68 @@ type QuantifierRead = { consumed: number; + minRepeat: number; + maxRepeat: number | null; }; type TokenState = { containsRepetition: boolean; + hasAmbiguousAlternation: boolean; + minLength: number; + maxLength: number; }; type ParseFrame = { lastToken: TokenState | null; containsRepetition: boolean; + hasAlternation: boolean; + branchMinLength: number; + branchMaxLength: number; + altMinLength: number | null; + altMaxLength: number | null; }; const SAFE_REGEX_CACHE_MAX = 256; const safeRegexCache = new Map(); +function createParseFrame(): ParseFrame { + return { + lastToken: null, + containsRepetition: false, + hasAlternation: false, + branchMinLength: 0, + branchMaxLength: 0, + altMinLength: null, + altMaxLength: null, + }; +} + +function addLength(left: number, right: number): number { + if (!Number.isFinite(left) || !Number.isFinite(right)) { + return Number.POSITIVE_INFINITY; + } + return left + right; +} + +function multiplyLength(length: number, factor: number): number { + if (!Number.isFinite(length)) { + return factor === 0 ? 0 : Number.POSITIVE_INFINITY; + } + return length * factor; +} + +function recordAlternative(frame: ParseFrame): void { + if (frame.altMinLength === null || frame.altMaxLength === null) { + frame.altMinLength = frame.branchMinLength; + frame.altMaxLength = frame.branchMaxLength; + return; + } + frame.altMinLength = Math.min(frame.altMinLength, frame.branchMinLength); + frame.altMaxLength = Math.max(frame.altMaxLength, frame.branchMaxLength); +} + export function hasNestedRepetition(source: string): boolean { // Conservative parser: reject patterns where a repeated token/group is repeated again. - const frames: ParseFrame[] = [{ lastToken: null, containsRepetition: false }]; + const frames: ParseFrame[] = [createParseFrame()]; let inCharClass = false; const emitToken = (token: TokenState) => { @@ -25,6 +71,17 @@ export function hasNestedRepetition(source: string): boolean { if (token.containsRepetition) { frame.containsRepetition = true; } + frame.branchMinLength = addLength(frame.branchMinLength, token.minLength); + frame.branchMaxLength = addLength(frame.branchMaxLength, token.maxLength); + }; + + const emitSimpleToken = () => { + emitToken({ + containsRepetition: false, + hasAmbiguousAlternation: false, + minLength: 1, + maxLength: 1, + }); }; for (let i = 0; i < source.length; i += 1) { @@ -32,7 +89,7 @@ export function hasNestedRepetition(source: string): boolean { if (ch === "\\") { i += 1; - emitToken({ containsRepetition: false }); + emitSimpleToken(); continue; } @@ -45,25 +102,47 @@ export function hasNestedRepetition(source: string): boolean { if (ch === "[") { inCharClass = true; - emitToken({ containsRepetition: false }); + emitSimpleToken(); continue; } if (ch === "(") { - frames.push({ lastToken: null, containsRepetition: false }); + frames.push(createParseFrame()); continue; } if (ch === ")") { if (frames.length > 1) { const frame = frames.pop() as ParseFrame; - emitToken({ containsRepetition: frame.containsRepetition }); + if (frame.hasAlternation) { + recordAlternative(frame); + } + const groupMinLength = frame.hasAlternation + ? (frame.altMinLength ?? 0) + : frame.branchMinLength; + const groupMaxLength = frame.hasAlternation + ? (frame.altMaxLength ?? 0) + : frame.branchMaxLength; + emitToken({ + containsRepetition: frame.containsRepetition, + hasAmbiguousAlternation: + frame.hasAlternation && + frame.altMinLength !== null && + frame.altMaxLength !== null && + frame.altMinLength !== frame.altMaxLength, + minLength: groupMinLength, + maxLength: groupMaxLength, + }); } continue; } if (ch === "|") { const frame = frames[frames.length - 1]; + frame.hasAlternation = true; + recordAlternative(frame); + frame.branchMinLength = 0; + frame.branchMaxLength = 0; frame.lastToken = null; continue; } @@ -78,13 +157,32 @@ export function hasNestedRepetition(source: string): boolean { if (token.containsRepetition) { return true; } + if (token.hasAmbiguousAlternation && quantifier.maxRepeat === null) { + return true; + } + + const previousMinLength = token.minLength; + const previousMaxLength = token.maxLength; + token.minLength = multiplyLength(token.minLength, quantifier.minRepeat); + token.maxLength = + quantifier.maxRepeat === null + ? Number.POSITIVE_INFINITY + : multiplyLength(token.maxLength, quantifier.maxRepeat); token.containsRepetition = true; frame.containsRepetition = true; + frame.branchMinLength = frame.branchMinLength - previousMinLength + token.minLength; + + const branchMaxBase = + Number.isFinite(frame.branchMaxLength) && Number.isFinite(previousMaxLength) + ? frame.branchMaxLength - previousMaxLength + : Number.POSITIVE_INFINITY; + frame.branchMaxLength = addLength(branchMaxBase, token.maxLength); + i += quantifier.consumed - 1; continue; } - emitToken({ containsRepetition: false }); + emitSimpleToken(); } return false; @@ -92,12 +190,20 @@ export function hasNestedRepetition(source: string): boolean { function readQuantifier(source: string, index: number): QuantifierRead | null { const ch = source[index]; - if (ch === "*" || ch === "+" || ch === "?") { - return { consumed: source[index + 1] === "?" ? 2 : 1 }; + const consumed = source[index + 1] === "?" ? 2 : 1; + if (ch === "*") { + return { consumed, minRepeat: 0, maxRepeat: null }; + } + if (ch === "+") { + return { consumed, minRepeat: 1, maxRepeat: null }; + } + if (ch === "?") { + return { consumed, minRepeat: 0, maxRepeat: 1 }; } if (ch !== "{") { return null; } + let i = index + 1; while (i < source.length && /\d/.test(source[i])) { i += 1; @@ -105,12 +211,18 @@ function readQuantifier(source: string, index: number): QuantifierRead | null { if (i === index + 1) { return null; } + + const minRepeat = Number.parseInt(source.slice(index + 1, i), 10); + let maxRepeat: number | null = minRepeat; if (source[i] === ",") { i += 1; + const maxStart = i; while (i < source.length && /\d/.test(source[i])) { i += 1; } + maxRepeat = i === maxStart ? null : Number.parseInt(source.slice(maxStart, i), 10); } + if (source[i] !== "}") { return null; } @@ -118,7 +230,11 @@ function readQuantifier(source: string, index: number): QuantifierRead | null { if (source[i] === "?") { i += 1; } - return { consumed: i - index }; + if (maxRepeat !== null && maxRepeat < minRepeat) { + return null; + } + + return { consumed: i - index, minRepeat, maxRepeat }; } export function compileSafeRegex(source: string, flags = ""): RegExp | null {