..." cases.
+ const voidTags = new Set([
+ "area",
+ "base",
+ "br",
+ "col",
+ "embed",
+ "hr",
+ "img",
+ "input",
+ "link",
+ "meta",
+ "param",
+ "source",
+ "track",
+ "wbr",
+ ]);
+
+ let depth = 0;
+ const len = html.length;
+ for (let i = 0; i < len; i++) {
+ if (html.charCodeAt(i) !== 60) {
+ continue; // '<'
+ }
+ const next = html.charCodeAt(i + 1);
+ if (next === 33 || next === 63) {
+ continue; // or ...>
+ }
+
+ let j = i + 1;
+ let closing = false;
+ if (html.charCodeAt(j) === 47) {
+ closing = true;
+ j += 1;
+ }
+
+ while (j < len && html.charCodeAt(j) <= 32) {
+ j += 1;
+ }
+
+ const nameStart = j;
+ while (j < len) {
+ const c = html.charCodeAt(j);
+ const isNameChar =
+ (c >= 65 && c <= 90) || // A-Z
+ (c >= 97 && c <= 122) || // a-z
+ (c >= 48 && c <= 57) || // 0-9
+ c === 58 || // :
+ c === 45; // -
+ if (!isNameChar) {
+ break;
+ }
+ j += 1;
+ }
+
+ const tagName = html.slice(nameStart, j).toLowerCase();
+ if (!tagName) {
+ continue;
+ }
+
+ if (closing) {
+ depth = Math.max(0, depth - 1);
+ continue;
+ }
+
+ if (voidTags.has(tagName)) {
+ continue;
+ }
+
+ // Best-effort self-closing detection: scan a short window for "/>".
+ let selfClosing = false;
+ for (let k = j; k < len && k < j + 200; k++) {
+ const c = html.charCodeAt(k);
+ if (c === 62) {
+ if (html.charCodeAt(k - 1) === 47) {
+ selfClosing = true;
+ }
+ break;
+ }
+ }
+ if (selfClosing) {
+ continue;
+ }
+
+ depth += 1;
+ if (depth > maxDepth) {
+ return true;
+ }
+ }
+ return false;
+}
+
export async function extractReadableContent(params: {
html: string;
url: string;
@@ -120,6 +217,12 @@ export async function extractReadableContent(params: {
}
return rendered;
};
+ if (
+ params.html.length > READABILITY_MAX_HTML_CHARS ||
+ exceedsEstimatedHtmlNestingDepth(params.html, READABILITY_MAX_ESTIMATED_NESTING_DEPTH)
+ ) {
+ return fallback();
+ }
try {
const { Readability, parseHTML } = await loadReadabilityDeps();
const { document } = parseHTML(params.html);
diff --git a/src/agents/tools/web-fetch.response-limit.test.ts b/src/agents/tools/web-fetch.response-limit.test.ts
new file mode 100644
index 000000000..2755fd0b1
--- /dev/null
+++ b/src/agents/tools/web-fetch.response-limit.test.ts
@@ -0,0 +1,66 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
+import * as ssrf from "../../infra/net/ssrf.js";
+import { createWebFetchTool } from "./web-tools.js";
+
+// Avoid dynamic-importing heavy readability deps in this unit test suite.
+vi.mock("./web-fetch-utils.js", async () => {
+ const actual =
+ await vi.importActual
("./web-fetch-utils.js");
+ return {
+ ...actual,
+ extractReadableContent: vi.fn().mockResolvedValue({
+ title: "HTML Page",
+ text: "HTML Page\n\nContent here.",
+ }),
+ };
+});
+
+const lookupMock = vi.fn();
+const resolvePinnedHostname = ssrf.resolvePinnedHostname;
+const baseToolConfig = {
+ config: {
+ tools: {
+ web: { fetch: { cacheTtlMinutes: 0, firecrawl: { enabled: false }, maxResponseBytes: 1024 } },
+ },
+ },
+} as const;
+
+describe("web_fetch response size limits", () => {
+ const priorFetch = global.fetch;
+
+ beforeEach(() => {
+ lookupMock.mockResolvedValue([{ address: "93.184.216.34", family: 4 }]);
+ vi.spyOn(ssrf, "resolvePinnedHostname").mockImplementation((hostname) =>
+ resolvePinnedHostname(hostname, lookupMock),
+ );
+ });
+
+ afterEach(() => {
+ // @ts-expect-error restore
+ global.fetch = priorFetch;
+ lookupMock.mockReset();
+ vi.restoreAllMocks();
+ });
+
+ it("caps response bytes and does not hang on endless streams", async () => {
+ const chunk = new TextEncoder().encode("hi
");
+ const stream = new ReadableStream({
+ pull(controller) {
+ controller.enqueue(chunk);
+ },
+ });
+ const response = new Response(stream, {
+ status: 200,
+ headers: { "content-type": "text/html; charset=utf-8" },
+ });
+
+ const fetchSpy = vi.fn().mockResolvedValue(response);
+ // @ts-expect-error mock fetch
+ global.fetch = fetchSpy;
+
+ const tool = createWebFetchTool(baseToolConfig);
+ const result = await tool?.execute?.("call", { url: "https://example.com/stream" });
+
+ expect(result?.details?.warning).toContain("Response body truncated");
+ });
+});
diff --git a/src/agents/tools/web-fetch.ts b/src/agents/tools/web-fetch.ts
index a703aa54f..b92fec9db 100644
--- a/src/agents/tools/web-fetch.ts
+++ b/src/agents/tools/web-fetch.ts
@@ -33,8 +33,12 @@ export { extractReadableContent } from "./web-fetch-utils.js";
const EXTRACT_MODES = ["markdown", "text"] as const;
const DEFAULT_FETCH_MAX_CHARS = 50_000;
+const DEFAULT_FETCH_MAX_RESPONSE_BYTES = 2_000_000;
+const FETCH_MAX_RESPONSE_BYTES_MIN = 32_000;
+const FETCH_MAX_RESPONSE_BYTES_MAX = 10_000_000;
const DEFAULT_FETCH_MAX_REDIRECTS = 3;
const DEFAULT_ERROR_MAX_CHARS = 4_000;
+const DEFAULT_ERROR_MAX_BYTES = 64_000;
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev";
const DEFAULT_FIRECRAWL_MAX_AGE_MS = 172_800_000;
const DEFAULT_FETCH_USER_AGENT =
@@ -108,6 +112,18 @@ function resolveFetchMaxCharsCap(fetch?: WebFetchConfig): number {
return Math.max(100, Math.floor(raw));
}
+function resolveFetchMaxResponseBytes(fetch?: WebFetchConfig): number {
+ const raw =
+ fetch && "maxResponseBytes" in fetch && typeof fetch.maxResponseBytes === "number"
+ ? fetch.maxResponseBytes
+ : undefined;
+ if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
+ return DEFAULT_FETCH_MAX_RESPONSE_BYTES;
+ }
+ const value = Math.floor(raw);
+ return Math.min(FETCH_MAX_RESPONSE_BYTES_MAX, Math.max(FETCH_MAX_RESPONSE_BYTES_MIN, value));
+}
+
function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig {
if (!fetch || typeof fetch !== "object") {
return undefined;
@@ -413,6 +429,7 @@ async function runWebFetch(params: {
url: string;
extractMode: ExtractMode;
maxChars: number;
+ maxResponseBytes: number;
maxRedirects: number;
timeoutSeconds: number;
cacheTtlMs: number;
@@ -530,7 +547,8 @@ async function runWebFetch(params: {
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
- const rawDetail = await readResponseText(res);
+ const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
+ const rawDetail = rawDetailResult.text;
const detail = formatWebFetchErrorDetail({
detail: rawDetail,
contentType: res.headers.get("content-type"),
@@ -542,7 +560,11 @@ async function runWebFetch(params: {
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
const normalizedContentType = normalizeContentType(contentType) ?? "application/octet-stream";
- const body = await readResponseText(res);
+ const bodyResult = await readResponseText(res, { maxBytes: params.maxResponseBytes });
+ const body = bodyResult.text;
+ const responseTruncatedWarning = bodyResult.truncated
+ ? `Response body truncated after ${params.maxResponseBytes} bytes.`
+ : undefined;
let title: string | undefined;
let extractor = "raw";
@@ -593,6 +615,7 @@ async function runWebFetch(params: {
const wrapped = wrapWebFetchContent(text, params.maxChars);
const wrappedTitle = title ? wrapWebFetchField(title) : undefined;
+ const wrappedWarning = wrapWebFetchField(responseTruncatedWarning);
const payload = {
url: params.url, // Keep raw for tool chaining
finalUrl, // Keep raw
@@ -613,6 +636,7 @@ async function runWebFetch(params: {
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start,
text: wrapped.text,
+ warning: wrappedWarning,
};
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
@@ -695,6 +719,7 @@ export function createWebFetchTool(options?: {
const userAgent =
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
DEFAULT_FETCH_USER_AGENT;
+ const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
return {
label: "Web Fetch",
name: "web_fetch",
@@ -715,6 +740,7 @@ export function createWebFetchTool(options?: {
DEFAULT_FETCH_MAX_CHARS,
maxCharsCap,
),
+ maxResponseBytes,
maxRedirects: resolveMaxRedirects(fetch?.maxRedirects, DEFAULT_FETCH_MAX_REDIRECTS),
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
diff --git a/src/agents/tools/web-search.ts b/src/agents/tools/web-search.ts
index f2e059f43..be174b951 100644
--- a/src/agents/tools/web-search.ts
+++ b/src/agents/tools/web-search.ts
@@ -486,7 +486,8 @@ async function runPerplexitySearch(params: {
});
if (!res.ok) {
- const detail = await readResponseText(res);
+ const detailResult = await readResponseText(res, { maxBytes: 64_000 });
+ const detail = detailResult.text;
throw new Error(`Perplexity API error (${res.status}): ${detail || res.statusText}`);
}
@@ -535,7 +536,8 @@ async function runGrokSearch(params: {
});
if (!res.ok) {
- const detail = await readResponseText(res);
+ const detailResult = await readResponseText(res, { maxBytes: 64_000 });
+ const detail = detailResult.text;
throw new Error(`xAI API error (${res.status}): ${detail || res.statusText}`);
}
@@ -665,7 +667,8 @@ async function runWebSearch(params: {
});
if (!res.ok) {
- const detail = await readResponseText(res);
+ const detailResult = await readResponseText(res, { maxBytes: 64_000 });
+ const detail = detailResult.text;
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
diff --git a/src/agents/tools/web-shared.ts b/src/agents/tools/web-shared.ts
index 2a7353796..da0fbb38b 100644
--- a/src/agents/tools/web-shared.ts
+++ b/src/agents/tools/web-shared.ts
@@ -86,10 +86,85 @@ export function withTimeout(signal: AbortSignal | undefined, timeoutMs: number):
return controller.signal;
}
-export async function readResponseText(res: Response): Promise {
+export type ReadResponseTextResult = {
+ text: string;
+ truncated: boolean;
+ bytesRead: number;
+};
+
+export async function readResponseText(
+ res: Response,
+ options?: { maxBytes?: number },
+): Promise {
+ const maxBytesRaw = options?.maxBytes;
+ const maxBytes =
+ typeof maxBytesRaw === "number" && Number.isFinite(maxBytesRaw) && maxBytesRaw > 0
+ ? Math.floor(maxBytesRaw)
+ : undefined;
+
+ const body = (res as unknown as { body?: unknown }).body;
+ if (
+ maxBytes &&
+ body &&
+ typeof body === "object" &&
+ "getReader" in body &&
+ typeof (body as { getReader: () => unknown }).getReader === "function"
+ ) {
+ const reader = (body as ReadableStream).getReader();
+ const decoder = new TextDecoder();
+ let bytesRead = 0;
+ let truncated = false;
+ const parts: string[] = [];
+
+ try {
+ while (true) {
+ const { value, done } = await reader.read();
+ if (done) {
+ break;
+ }
+ if (!value || value.byteLength === 0) {
+ continue;
+ }
+
+ let chunk = value;
+ if (bytesRead + chunk.byteLength > maxBytes) {
+ const remaining = Math.max(0, maxBytes - bytesRead);
+ if (remaining <= 0) {
+ truncated = true;
+ break;
+ }
+ chunk = chunk.subarray(0, remaining);
+ truncated = true;
+ }
+
+ bytesRead += chunk.byteLength;
+ parts.push(decoder.decode(chunk, { stream: true }));
+
+ if (truncated || bytesRead >= maxBytes) {
+ truncated = true;
+ break;
+ }
+ }
+ } catch {
+ // Best-effort: return whatever we decoded so far.
+ } finally {
+ if (truncated) {
+ try {
+ await reader.cancel();
+ } catch {
+ // ignore
+ }
+ }
+ }
+
+ parts.push(decoder.decode());
+ return { text: parts.join(""), truncated, bytesRead };
+ }
+
try {
- return await res.text();
+ const text = await res.text();
+ return { text, truncated: false, bytesRead: text.length };
} catch {
- return "";
+ return { text: "", truncated: false, bytesRead: 0 };
}
}