Files
Moltbot/src/agents/tools/web-fetch.ts
2026-02-16 02:15:02 +00:00

774 lines
24 KiB
TypeScript

import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import type { AnyAgentTool } from "./common.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { SsrFBlockedError } from "../../infra/net/ssrf.js";
import { logDebug } from "../../logger.js";
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
import { stringEnum } from "../schema/typebox.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
import {
extractReadableContent,
htmlToMarkdown,
markdownToText,
truncateText,
type ExtractMode,
} from "./web-fetch-utils.js";
import {
CacheEntry,
DEFAULT_CACHE_TTL_MINUTES,
DEFAULT_TIMEOUT_SECONDS,
normalizeCacheKey,
readCache,
readResponseText,
resolveCacheTtlMs,
resolveTimeoutSeconds,
withTimeout,
writeCache,
} from "./web-shared.js";
export { extractReadableContent } from "./web-fetch-utils.js";
const EXTRACT_MODES = ["markdown", "text"] as const;
const DEFAULT_FETCH_MAX_CHARS = 50_000;
const DEFAULT_FETCH_MAX_RESPONSE_BYTES = 2_000_000;
const FETCH_MAX_RESPONSE_BYTES_MIN = 32_000;
const FETCH_MAX_RESPONSE_BYTES_MAX = 10_000_000;
const DEFAULT_FETCH_MAX_REDIRECTS = 3;
const DEFAULT_ERROR_MAX_CHARS = 4_000;
const DEFAULT_ERROR_MAX_BYTES = 64_000;
const DEFAULT_FIRECRAWL_BASE_URL = "https://api.firecrawl.dev";
const DEFAULT_FIRECRAWL_MAX_AGE_MS = 172_800_000;
const DEFAULT_FETCH_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
const FETCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
const WebFetchSchema = Type.Object({
url: Type.String({ description: "HTTP or HTTPS URL to fetch." }),
extractMode: Type.Optional(
stringEnum(EXTRACT_MODES, {
description: 'Extraction mode ("markdown" or "text").',
default: "markdown",
}),
),
maxChars: Type.Optional(
Type.Number({
description: "Maximum characters to return (truncates when exceeded).",
minimum: 100,
}),
),
});
type WebFetchConfig = NonNullable<OpenClawConfig["tools"]>["web"] extends infer Web
? Web extends { fetch?: infer Fetch }
? Fetch
: undefined
: undefined;
type FirecrawlFetchConfig =
| {
enabled?: boolean;
apiKey?: string;
baseUrl?: string;
onlyMainContent?: boolean;
maxAgeMs?: number;
timeoutSeconds?: number;
}
| undefined;
function resolveFetchConfig(cfg?: OpenClawConfig): WebFetchConfig {
const fetch = cfg?.tools?.web?.fetch;
if (!fetch || typeof fetch !== "object") {
return undefined;
}
return fetch as WebFetchConfig;
}
function resolveFetchEnabled(params: { fetch?: WebFetchConfig; sandboxed?: boolean }): boolean {
if (typeof params.fetch?.enabled === "boolean") {
return params.fetch.enabled;
}
return true;
}
function resolveFetchReadabilityEnabled(fetch?: WebFetchConfig): boolean {
if (typeof fetch?.readability === "boolean") {
return fetch.readability;
}
return true;
}
function resolveFetchMaxCharsCap(fetch?: WebFetchConfig): number {
const raw =
fetch && "maxCharsCap" in fetch && typeof fetch.maxCharsCap === "number"
? fetch.maxCharsCap
: undefined;
if (typeof raw !== "number" || !Number.isFinite(raw)) {
return DEFAULT_FETCH_MAX_CHARS;
}
return Math.max(100, Math.floor(raw));
}
function resolveFetchMaxResponseBytes(fetch?: WebFetchConfig): number {
const raw =
fetch && "maxResponseBytes" in fetch && typeof fetch.maxResponseBytes === "number"
? fetch.maxResponseBytes
: undefined;
if (typeof raw !== "number" || !Number.isFinite(raw) || raw <= 0) {
return DEFAULT_FETCH_MAX_RESPONSE_BYTES;
}
const value = Math.floor(raw);
return Math.min(FETCH_MAX_RESPONSE_BYTES_MAX, Math.max(FETCH_MAX_RESPONSE_BYTES_MIN, value));
}
function resolveFirecrawlConfig(fetch?: WebFetchConfig): FirecrawlFetchConfig {
if (!fetch || typeof fetch !== "object") {
return undefined;
}
const firecrawl = "firecrawl" in fetch ? fetch.firecrawl : undefined;
if (!firecrawl || typeof firecrawl !== "object") {
return undefined;
}
return firecrawl as FirecrawlFetchConfig;
}
function resolveFirecrawlApiKey(firecrawl?: FirecrawlFetchConfig): string | undefined {
const fromConfig =
firecrawl && "apiKey" in firecrawl && typeof firecrawl.apiKey === "string"
? normalizeSecretInput(firecrawl.apiKey)
: "";
const fromEnv = normalizeSecretInput(process.env.FIRECRAWL_API_KEY);
return fromConfig || fromEnv || undefined;
}
function resolveFirecrawlEnabled(params: {
firecrawl?: FirecrawlFetchConfig;
apiKey?: string;
}): boolean {
if (typeof params.firecrawl?.enabled === "boolean") {
return params.firecrawl.enabled;
}
return Boolean(params.apiKey);
}
function resolveFirecrawlBaseUrl(firecrawl?: FirecrawlFetchConfig): string {
const raw =
firecrawl && "baseUrl" in firecrawl && typeof firecrawl.baseUrl === "string"
? firecrawl.baseUrl.trim()
: "";
return raw || DEFAULT_FIRECRAWL_BASE_URL;
}
function resolveFirecrawlOnlyMainContent(firecrawl?: FirecrawlFetchConfig): boolean {
if (typeof firecrawl?.onlyMainContent === "boolean") {
return firecrawl.onlyMainContent;
}
return true;
}
function resolveFirecrawlMaxAgeMs(firecrawl?: FirecrawlFetchConfig): number | undefined {
const raw =
firecrawl && "maxAgeMs" in firecrawl && typeof firecrawl.maxAgeMs === "number"
? firecrawl.maxAgeMs
: undefined;
if (typeof raw !== "number" || !Number.isFinite(raw)) {
return undefined;
}
const parsed = Math.max(0, Math.floor(raw));
return parsed > 0 ? parsed : undefined;
}
function resolveFirecrawlMaxAgeMsOrDefault(firecrawl?: FirecrawlFetchConfig): number {
const resolved = resolveFirecrawlMaxAgeMs(firecrawl);
if (typeof resolved === "number") {
return resolved;
}
return DEFAULT_FIRECRAWL_MAX_AGE_MS;
}
function resolveMaxChars(value: unknown, fallback: number, cap: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
const clamped = Math.max(100, Math.floor(parsed));
return Math.min(clamped, cap);
}
function resolveMaxRedirects(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(0, Math.floor(parsed));
}
function looksLikeHtml(value: string): boolean {
const trimmed = value.trimStart();
if (!trimmed) {
return false;
}
const head = trimmed.slice(0, 256).toLowerCase();
return head.startsWith("<!doctype html") || head.startsWith("<html");
}
function formatWebFetchErrorDetail(params: {
detail: string;
contentType?: string | null;
maxChars: number;
}): string {
const { detail, contentType, maxChars } = params;
if (!detail) {
return "";
}
let text = detail;
const contentTypeLower = contentType?.toLowerCase();
if (contentTypeLower?.includes("text/html") || looksLikeHtml(detail)) {
const rendered = htmlToMarkdown(detail);
const withTitle = rendered.title ? `${rendered.title}\n${rendered.text}` : rendered.text;
text = markdownToText(withTitle);
}
const truncated = truncateText(text.trim(), maxChars);
return truncated.text;
}
function redactUrlForDebugLog(rawUrl: string): string {
try {
const parsed = new URL(rawUrl);
return parsed.pathname && parsed.pathname !== "/" ? `${parsed.origin}/...` : parsed.origin;
} catch {
return "[invalid-url]";
}
}
const WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD = wrapWebContent("", "web_fetch").length;
const WEB_FETCH_WRAPPER_NO_WARNING_OVERHEAD = wrapExternalContent("", {
source: "web_fetch",
includeWarning: false,
}).length;
function wrapWebFetchContent(
value: string,
maxChars: number,
): {
text: string;
truncated: boolean;
rawLength: number;
wrappedLength: number;
} {
if (maxChars <= 0) {
return { text: "", truncated: true, rawLength: 0, wrappedLength: 0 };
}
const includeWarning = maxChars >= WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD;
const wrapperOverhead = includeWarning
? WEB_FETCH_WRAPPER_WITH_WARNING_OVERHEAD
: WEB_FETCH_WRAPPER_NO_WARNING_OVERHEAD;
if (wrapperOverhead > maxChars) {
const minimal = includeWarning
? wrapWebContent("", "web_fetch")
: wrapExternalContent("", { source: "web_fetch", includeWarning: false });
const truncatedWrapper = truncateText(minimal, maxChars);
return {
text: truncatedWrapper.text,
truncated: true,
rawLength: 0,
wrappedLength: truncatedWrapper.text.length,
};
}
const maxInner = Math.max(0, maxChars - wrapperOverhead);
let truncated = truncateText(value, maxInner);
let wrappedText = includeWarning
? wrapWebContent(truncated.text, "web_fetch")
: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false });
if (wrappedText.length > maxChars) {
const excess = wrappedText.length - maxChars;
const adjustedMaxInner = Math.max(0, maxInner - excess);
truncated = truncateText(value, adjustedMaxInner);
wrappedText = includeWarning
? wrapWebContent(truncated.text, "web_fetch")
: wrapExternalContent(truncated.text, { source: "web_fetch", includeWarning: false });
}
return {
text: wrappedText,
truncated: truncated.truncated,
rawLength: truncated.text.length,
wrappedLength: wrappedText.length,
};
}
function wrapWebFetchField(value: string | undefined): string | undefined {
if (!value) {
return value;
}
return wrapExternalContent(value, { source: "web_fetch", includeWarning: false });
}
function buildFirecrawlWebFetchPayload(params: {
firecrawl: Awaited<ReturnType<typeof fetchFirecrawlContent>>;
rawUrl: string;
finalUrlFallback: string;
statusFallback: number;
extractMode: ExtractMode;
maxChars: number;
tookMs: number;
}): Record<string, unknown> {
const wrapped = wrapWebFetchContent(params.firecrawl.text, params.maxChars);
const wrappedTitle = params.firecrawl.title
? wrapWebFetchField(params.firecrawl.title)
: undefined;
return {
url: params.rawUrl, // Keep raw for tool chaining
finalUrl: params.firecrawl.finalUrl || params.finalUrlFallback, // Keep raw
status: params.firecrawl.status ?? params.statusFallback,
contentType: "text/markdown", // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode,
extractor: "firecrawl",
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: params.tookMs,
text: wrapped.text,
warning: wrapWebFetchField(params.firecrawl.warning),
};
}
function normalizeContentType(value: string | null | undefined): string | undefined {
if (!value) {
return undefined;
}
const [raw] = value.split(";");
const trimmed = raw?.trim();
return trimmed || undefined;
}
export async function fetchFirecrawlContent(params: {
url: string;
extractMode: ExtractMode;
apiKey: string;
baseUrl: string;
onlyMainContent: boolean;
maxAgeMs: number;
proxy: "auto" | "basic" | "stealth";
storeInCache: boolean;
timeoutSeconds: number;
}): Promise<{
text: string;
title?: string;
finalUrl?: string;
status?: number;
warning?: string;
}> {
const endpoint = resolveFirecrawlEndpoint(params.baseUrl);
const body: Record<string, unknown> = {
url: params.url,
formats: ["markdown"],
onlyMainContent: params.onlyMainContent,
timeout: params.timeoutSeconds * 1000,
maxAge: params.maxAgeMs,
proxy: params.proxy,
storeInCache: params.storeInCache,
};
const res = await fetch(endpoint, {
method: "POST",
headers: {
Authorization: `Bearer ${params.apiKey}`,
"Content-Type": "application/json",
},
body: JSON.stringify(body),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
const payload = (await res.json()) as {
success?: boolean;
data?: {
markdown?: string;
content?: string;
metadata?: {
title?: string;
sourceURL?: string;
statusCode?: number;
};
};
warning?: string;
error?: string;
};
if (!res.ok || payload?.success === false) {
const detail = payload?.error ?? "";
throw new Error(
`Firecrawl fetch failed (${res.status}): ${wrapWebContent(detail || res.statusText, "web_fetch")}`.trim(),
);
}
const data = payload?.data ?? {};
const rawText =
typeof data.markdown === "string"
? data.markdown
: typeof data.content === "string"
? data.content
: "";
const text = params.extractMode === "text" ? markdownToText(rawText) : rawText;
return {
text,
title: data.metadata?.title,
finalUrl: data.metadata?.sourceURL,
status: data.metadata?.statusCode,
warning: payload?.warning,
};
}
type FirecrawlRuntimeParams = {
firecrawlEnabled: boolean;
firecrawlApiKey?: string;
firecrawlBaseUrl: string;
firecrawlOnlyMainContent: boolean;
firecrawlMaxAgeMs: number;
firecrawlProxy: "auto" | "basic" | "stealth";
firecrawlStoreInCache: boolean;
firecrawlTimeoutSeconds: number;
};
type WebFetchRuntimeParams = FirecrawlRuntimeParams & {
url: string;
extractMode: ExtractMode;
maxChars: number;
maxResponseBytes: number;
maxRedirects: number;
timeoutSeconds: number;
cacheTtlMs: number;
userAgent: string;
readabilityEnabled: boolean;
};
function toFirecrawlContentParams(
params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
): Parameters<typeof fetchFirecrawlContent>[0] | null {
if (!params.firecrawlEnabled || !params.firecrawlApiKey) {
return null;
}
return {
url: params.url,
extractMode: params.extractMode,
apiKey: params.firecrawlApiKey,
baseUrl: params.firecrawlBaseUrl,
onlyMainContent: params.firecrawlOnlyMainContent,
maxAgeMs: params.firecrawlMaxAgeMs,
proxy: params.firecrawlProxy,
storeInCache: params.firecrawlStoreInCache,
timeoutSeconds: params.firecrawlTimeoutSeconds,
};
}
async function maybeFetchFirecrawlWebFetchPayload(
params: WebFetchRuntimeParams & {
urlToFetch: string;
finalUrlFallback: string;
statusFallback: number;
cacheKey: string;
tookMs: number;
},
): Promise<Record<string, unknown> | null> {
const firecrawlParams = toFirecrawlContentParams({
...params,
url: params.urlToFetch,
extractMode: params.extractMode,
});
if (!firecrawlParams) {
return null;
}
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
const payload = buildFirecrawlWebFetchPayload({
firecrawl,
rawUrl: params.url,
finalUrlFallback: params.finalUrlFallback,
statusFallback: params.statusFallback,
extractMode: params.extractMode,
maxChars: params.maxChars,
tookMs: params.tookMs,
});
writeCache(FETCH_CACHE, params.cacheKey, payload, params.cacheTtlMs);
return payload;
}
async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(
`fetch:${params.url}:${params.extractMode}:${params.maxChars}`,
);
const cached = readCache(FETCH_CACHE, cacheKey);
if (cached) {
return { ...cached.value, cached: true };
}
let parsedUrl: URL;
try {
parsedUrl = new URL(params.url);
} catch {
throw new Error("Invalid URL: must be http or https");
}
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error("Invalid URL: must be http or https");
}
const start = Date.now();
let res: Response;
let release: (() => Promise<void>) | null = null;
let finalUrl = params.url;
try {
const result = await fetchWithSsrFGuard({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutSeconds * 1000,
init: {
headers: {
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",
"User-Agent": params.userAgent,
"Accept-Language": "en-US,en;q=0.9",
},
},
});
res = result.response;
finalUrl = result.finalUrl;
release = result.release;
// Cloudflare Markdown for Agents — log token budget hint when present
const markdownTokens = res.headers.get("x-markdown-tokens");
if (markdownTokens) {
logDebug(
`[web-fetch] x-markdown-tokens: ${markdownTokens} (${redactUrlForDebugLog(finalUrl)})`,
);
}
} catch (error) {
if (error instanceof SsrFBlockedError) {
throw error;
}
const payload = await maybeFetchFirecrawlWebFetchPayload({
...params,
urlToFetch: finalUrl,
finalUrlFallback: finalUrl,
statusFallback: 200,
cacheKey,
tookMs: Date.now() - start,
});
if (payload) {
return payload;
}
throw error;
}
try {
if (!res.ok) {
const payload = await maybeFetchFirecrawlWebFetchPayload({
...params,
urlToFetch: params.url,
finalUrlFallback: finalUrl,
statusFallback: res.status,
cacheKey,
tookMs: Date.now() - start,
});
if (payload) {
return payload;
}
const rawDetailResult = await readResponseText(res, { maxBytes: DEFAULT_ERROR_MAX_BYTES });
const rawDetail = rawDetailResult.text;
const detail = formatWebFetchErrorDetail({
detail: rawDetail,
contentType: res.headers.get("content-type"),
maxChars: DEFAULT_ERROR_MAX_CHARS,
});
const wrappedDetail = wrapWebFetchContent(detail || res.statusText, DEFAULT_ERROR_MAX_CHARS);
throw new Error(`Web fetch failed (${res.status}): ${wrappedDetail.text}`);
}
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
const normalizedContentType = normalizeContentType(contentType) ?? "application/octet-stream";
const bodyResult = await readResponseText(res, { maxBytes: params.maxResponseBytes });
const body = bodyResult.text;
const responseTruncatedWarning = bodyResult.truncated
? `Response body truncated after ${params.maxResponseBytes} bytes.`
: undefined;
let title: string | undefined;
let extractor = "raw";
let text = body;
if (contentType.includes("text/markdown")) {
// Cloudflare Markdown for Agents: server returned pre-rendered markdown
extractor = "cf-markdown";
if (params.extractMode === "text") {
text = markdownToText(body);
}
} else if (contentType.includes("text/html")) {
if (params.readabilityEnabled) {
const readable = await extractReadableContent({
html: body,
url: finalUrl,
extractMode: params.extractMode,
});
if (readable?.text) {
text = readable.text;
title = readable.title;
extractor = "readability";
} else {
const firecrawl = await tryFirecrawlFallback({ ...params, url: finalUrl });
if (firecrawl) {
text = firecrawl.text;
title = firecrawl.title;
extractor = "firecrawl";
} else {
throw new Error(
"Web fetch extraction failed: Readability and Firecrawl returned no content.",
);
}
}
} else {
throw new Error(
"Web fetch extraction failed: Readability disabled and Firecrawl unavailable.",
);
}
} else if (contentType.includes("application/json")) {
try {
text = JSON.stringify(JSON.parse(body), null, 2);
extractor = "json";
} catch {
text = body;
extractor = "raw";
}
}
const wrapped = wrapWebFetchContent(text, params.maxChars);
const wrappedTitle = title ? wrapWebFetchField(title) : undefined;
const wrappedWarning = wrapWebFetchField(responseTruncatedWarning);
const payload = {
url: params.url, // Keep raw for tool chaining
finalUrl, // Keep raw
status: res.status,
contentType: normalizedContentType, // Protocol metadata, don't wrap
title: wrappedTitle,
extractMode: params.extractMode,
extractor,
externalContent: {
untrusted: true,
source: "web_fetch",
wrapped: true,
},
truncated: wrapped.truncated,
length: wrapped.wrappedLength,
rawLength: wrapped.rawLength, // Actual content length, not wrapped
wrappedLength: wrapped.wrappedLength,
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start,
text: wrapped.text,
warning: wrappedWarning,
};
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
} finally {
if (release) {
await release();
}
}
}
async function tryFirecrawlFallback(
params: FirecrawlRuntimeParams & { url: string; extractMode: ExtractMode },
): Promise<{ text: string; title?: string } | null> {
const firecrawlParams = toFirecrawlContentParams(params);
if (!firecrawlParams) {
return null;
}
try {
const firecrawl = await fetchFirecrawlContent(firecrawlParams);
return { text: firecrawl.text, title: firecrawl.title };
} catch {
return null;
}
}
function resolveFirecrawlEndpoint(baseUrl: string): string {
const trimmed = baseUrl.trim();
if (!trimmed) {
return `${DEFAULT_FIRECRAWL_BASE_URL}/v2/scrape`;
}
try {
const url = new URL(trimmed);
if (url.pathname && url.pathname !== "/") {
return url.toString();
}
url.pathname = "/v2/scrape";
return url.toString();
} catch {
return `${DEFAULT_FIRECRAWL_BASE_URL}/v2/scrape`;
}
}
export function createWebFetchTool(options?: {
config?: OpenClawConfig;
sandboxed?: boolean;
}): AnyAgentTool | null {
const fetch = resolveFetchConfig(options?.config);
if (!resolveFetchEnabled({ fetch, sandboxed: options?.sandboxed })) {
return null;
}
const readabilityEnabled = resolveFetchReadabilityEnabled(fetch);
const firecrawl = resolveFirecrawlConfig(fetch);
const firecrawlApiKey = resolveFirecrawlApiKey(firecrawl);
const firecrawlEnabled = resolveFirecrawlEnabled({ firecrawl, apiKey: firecrawlApiKey });
const firecrawlBaseUrl = resolveFirecrawlBaseUrl(firecrawl);
const firecrawlOnlyMainContent = resolveFirecrawlOnlyMainContent(firecrawl);
const firecrawlMaxAgeMs = resolveFirecrawlMaxAgeMsOrDefault(firecrawl);
const firecrawlTimeoutSeconds = resolveTimeoutSeconds(
firecrawl?.timeoutSeconds ?? fetch?.timeoutSeconds,
DEFAULT_TIMEOUT_SECONDS,
);
const userAgent =
(fetch && "userAgent" in fetch && typeof fetch.userAgent === "string" && fetch.userAgent) ||
DEFAULT_FETCH_USER_AGENT;
const maxResponseBytes = resolveFetchMaxResponseBytes(fetch);
return {
label: "Web Fetch",
name: "web_fetch",
description:
"Fetch and extract readable content from a URL (HTML → markdown/text). Use for lightweight page access without browser automation.",
parameters: WebFetchSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const url = readStringParam(params, "url", { required: true });
const extractMode = readStringParam(params, "extractMode") === "text" ? "text" : "markdown";
const maxChars = readNumberParam(params, "maxChars", { integer: true });
const maxCharsCap = resolveFetchMaxCharsCap(fetch);
const result = await runWebFetch({
url,
extractMode,
maxChars: resolveMaxChars(
maxChars ?? fetch?.maxChars,
DEFAULT_FETCH_MAX_CHARS,
maxCharsCap,
),
maxResponseBytes,
maxRedirects: resolveMaxRedirects(fetch?.maxRedirects, DEFAULT_FETCH_MAX_REDIRECTS),
timeoutSeconds: resolveTimeoutSeconds(fetch?.timeoutSeconds, DEFAULT_TIMEOUT_SECONDS),
cacheTtlMs: resolveCacheTtlMs(fetch?.cacheTtlMinutes, DEFAULT_CACHE_TTL_MINUTES),
userAgent,
readabilityEnabled,
firecrawlEnabled,
firecrawlApiKey,
firecrawlBaseUrl,
firecrawlOnlyMainContent,
firecrawlMaxAgeMs,
firecrawlProxy: "auto",
firecrawlStoreInCache: true,
firecrawlTimeoutSeconds,
});
return jsonResult(result);
},
};
}