refactor(web): unify proxy-guarded fetch path for web tools

This commit is contained in:
Peter Steinberger
2026-02-26 12:44:06 +01:00
parent 8bf1c9a23a
commit b74be2577f
5 changed files with 386 additions and 252 deletions

View File

@@ -1,6 +1,5 @@
import { Type } from "@sinclair/typebox";
import type { OpenClawConfig } from "../../config/config.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { SsrFBlockedError } from "../../infra/net/ssrf.js";
import { logDebug } from "../../logger.js";
import { wrapExternalContent, wrapWebContent } from "../../security/external-content.js";
@@ -15,6 +14,7 @@ import {
truncateText,
type ExtractMode,
} from "./web-fetch-utils.js";
import { fetchWithWebToolsNetworkGuard } from "./web-guarded-fetch.js";
import {
CacheEntry,
DEFAULT_CACHE_TTL_MINUTES,
@@ -523,11 +523,10 @@ async function runWebFetch(params: WebFetchRuntimeParams): Promise<Record<string
let release: (() => Promise<void>) | null = null;
let finalUrl = params.url;
try {
const result = await fetchWithSsrFGuard({
const result = await fetchWithWebToolsNetworkGuard({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutMs: params.timeoutSeconds * 1000,
proxy: "env",
timeoutSeconds: params.timeoutSeconds,
init: {
headers: {
Accept: "text/markdown, text/html;q=0.9, */*;q=0.1",

View File

@@ -0,0 +1,50 @@
import {
fetchWithSsrFGuard,
type GuardedFetchOptions,
type GuardedFetchResult,
} from "../../infra/net/fetch-guard.js";
import type { SsrFPolicy } from "../../infra/net/ssrf.js";
export const WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY: SsrFPolicy = {
dangerouslyAllowPrivateNetwork: true,
};
type WebToolGuardedFetchOptions = Omit<GuardedFetchOptions, "proxy"> & {
timeoutSeconds?: number;
};
function resolveTimeoutMs(params: {
timeoutMs?: number;
timeoutSeconds?: number;
}): number | undefined {
if (typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)) {
return params.timeoutMs;
}
if (typeof params.timeoutSeconds === "number" && Number.isFinite(params.timeoutSeconds)) {
return params.timeoutSeconds * 1000;
}
return undefined;
}
export async function fetchWithWebToolsNetworkGuard(
params: WebToolGuardedFetchOptions,
): Promise<GuardedFetchResult> {
const { timeoutSeconds, ...rest } = params;
return fetchWithSsrFGuard({
...rest,
timeoutMs: resolveTimeoutMs({ timeoutMs: rest.timeoutMs, timeoutSeconds }),
proxy: "env",
});
}
export async function withWebToolsNetworkGuard<T>(
params: WebToolGuardedFetchOptions,
run: (result: { response: Response; finalUrl: string }) => Promise<T>,
): Promise<T> {
const { response, finalUrl, release } = await fetchWithWebToolsNetworkGuard(params);
try {
return await run({ response, finalUrl });
} finally {
await release();
}
}

View File

@@ -33,6 +33,7 @@ describe("web_search redirect resolution hardening", () => {
timeoutMs: 5000,
init: { method: "HEAD" },
policy: { dangerouslyAllowPrivateNetwork: true },
proxy: "env",
}),
);
expect(release).toHaveBeenCalledTimes(1);

View File

@@ -2,11 +2,14 @@ import { Type } from "@sinclair/typebox";
import { formatCliCommand } from "../../cli/command-format.js";
import type { OpenClawConfig } from "../../config/config.js";
import { logVerbose } from "../../globals.js";
import { fetchWithSsrFGuard } from "../../infra/net/fetch-guard.js";
import { wrapWebContent } from "../../security/external-content.js";
import { normalizeSecretInput } from "../../utils/normalize-secret-input.js";
import type { AnyAgentTool } from "./common.js";
import { jsonResult, readNumberParam, readStringParam } from "./common.js";
import {
WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
withWebToolsNetworkGuard,
} from "./web-guarded-fetch.js";
import {
CacheEntry,
DEFAULT_CACHE_TTL_MINUTES,
@@ -44,7 +47,6 @@ const BRAVE_FRESHNESS_SHORTCUTS = new Set(["pd", "pw", "pm", "py"]);
const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/;
const BRAVE_SEARCH_LANG_CODE = /^[a-z]{2}$/i;
const BRAVE_UI_LANG_LOCALE = /^([a-z]{2})-([a-z]{2})$/i;
const TRUSTED_NETWORK_SSRF_POLICY = { dangerouslyAllowPrivateNetwork: true } as const;
const WebSearchSchema = Type.Object({
query: Type.String({ description: "Search query string." }),
@@ -599,19 +601,23 @@ function resolveGeminiModel(gemini?: GeminiConfig): string {
return fromConfig || DEFAULT_GEMINI_MODEL;
}
async function fetchTrustedWebSearchEndpoint(params: {
url: string;
timeoutSeconds: number;
init: RequestInit;
}): Promise<{ response: Response; release: () => Promise<void> }> {
const { response, release } = await fetchWithSsrFGuard({
url: params.url,
init: params.init,
timeoutMs: params.timeoutSeconds * 1000,
policy: TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env",
});
return { response, release };
async function withTrustedWebSearchEndpoint<T>(
params: {
url: string;
timeoutSeconds: number;
init: RequestInit;
},
run: (response: Response) => Promise<T>,
): Promise<T> {
return withWebToolsNetworkGuard(
{
url: params.url,
init: params.init,
timeoutSeconds: params.timeoutSeconds,
policy: WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
},
async ({ response }) => run(response),
);
}
async function runGeminiSearch(params: {
@@ -622,81 +628,84 @@ async function runGeminiSearch(params: {
}): Promise<{ content: string; citations: Array<{ url: string; title?: string }> }> {
const endpoint = `${GEMINI_API_BASE}/models/${params.model}:generateContent`;
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-goog-api-key": params.apiKey,
},
body: JSON.stringify({
contents: [
{
parts: [{ text: params.query }],
},
],
tools: [{ google_search: {} }],
}),
},
});
try {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
// Strip API key from any error detail to prevent accidental key leakage in logs
const safeDetail = (detailResult.text || res.statusText).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
}
let data: GeminiGroundingResponse;
try {
data = (await res.json()) as GeminiGroundingResponse;
} catch (err) {
const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err });
}
if (data.error) {
const rawMsg = data.error.message || data.error.status || "unknown";
const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`);
}
const candidate = data.candidates?.[0];
const content =
candidate?.content?.parts
?.map((p) => p.text)
.filter(Boolean)
.join("\n") ?? "No response";
const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? [];
const rawCitations = groundingChunks
.filter((chunk) => chunk.web?.uri)
.map((chunk) => ({
url: chunk.web!.uri!,
title: chunk.web?.title || undefined,
}));
// Resolve Google grounding redirect URLs to direct URLs with concurrency cap.
// Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe.
const MAX_CONCURRENT_REDIRECTS = 10;
const citations: Array<{ url: string; title?: string }> = [];
for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) {
const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS);
const resolved = await Promise.all(
batch.map(async (citation) => {
const resolvedUrl = await resolveRedirectUrl(citation.url);
return { ...citation, url: resolvedUrl };
return withTrustedWebSearchEndpoint(
{
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
"x-goog-api-key": params.apiKey,
},
body: JSON.stringify({
contents: [
{
parts: [{ text: params.query }],
},
],
tools: [{ google_search: {} }],
}),
);
citations.push(...resolved);
}
},
},
async (res) => {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
// Strip API key from any error detail to prevent accidental key leakage in logs
const safeDetail = (detailResult.text || res.statusText).replace(
/key=[^&\s]+/gi,
"key=***",
);
throw new Error(`Gemini API error (${res.status}): ${safeDetail}`);
}
return { content, citations };
} finally {
await release();
}
let data: GeminiGroundingResponse;
try {
data = (await res.json()) as GeminiGroundingResponse;
} catch (err) {
const safeError = String(err).replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API returned invalid JSON: ${safeError}`, { cause: err });
}
if (data.error) {
const rawMsg = data.error.message || data.error.status || "unknown";
const safeMsg = rawMsg.replace(/key=[^&\s]+/gi, "key=***");
throw new Error(`Gemini API error (${data.error.code}): ${safeMsg}`);
}
const candidate = data.candidates?.[0];
const content =
candidate?.content?.parts
?.map((p) => p.text)
.filter(Boolean)
.join("\n") ?? "No response";
const groundingChunks = candidate?.groundingMetadata?.groundingChunks ?? [];
const rawCitations = groundingChunks
.filter((chunk) => chunk.web?.uri)
.map((chunk) => ({
url: chunk.web!.uri!,
title: chunk.web?.title || undefined,
}));
// Resolve Google grounding redirect URLs to direct URLs with concurrency cap.
// Gemini typically returns 3-8 citations; cap at 10 concurrent to be safe.
const MAX_CONCURRENT_REDIRECTS = 10;
const citations: Array<{ url: string; title?: string }> = [];
for (let i = 0; i < rawCitations.length; i += MAX_CONCURRENT_REDIRECTS) {
const batch = rawCitations.slice(i, i + MAX_CONCURRENT_REDIRECTS);
const resolved = await Promise.all(
batch.map(async (citation) => {
const resolvedUrl = await resolveRedirectUrl(citation.url);
return { ...citation, url: resolvedUrl };
}),
);
citations.push(...resolved);
}
return { content, citations };
},
);
}
const REDIRECT_TIMEOUT_MS = 5000;
@@ -707,18 +716,15 @@ const REDIRECT_TIMEOUT_MS = 5000;
*/
async function resolveRedirectUrl(url: string): Promise<string> {
try {
const { finalUrl, release } = await fetchWithSsrFGuard({
url,
init: { method: "HEAD" },
timeoutMs: REDIRECT_TIMEOUT_MS,
policy: TRUSTED_NETWORK_SSRF_POLICY,
proxy: "env",
});
try {
return finalUrl || url;
} finally {
await release();
}
return await withWebToolsNetworkGuard(
{
url,
init: { method: "HEAD" },
timeoutMs: REDIRECT_TIMEOUT_MS,
policy: WEB_TOOLS_TRUSTED_NETWORK_SSRF_POLICY,
},
async ({ finalUrl }) => finalUrl || url,
);
} catch {
return url;
}
@@ -892,33 +898,33 @@ async function runPerplexitySearch(params: {
body.search_recency_filter = recencyFilter;
}
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
"HTTP-Referer": "https://openclaw.ai",
"X-Title": "OpenClaw Web Search",
return withTrustedWebSearchEndpoint(
{
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
"HTTP-Referer": "https://openclaw.ai",
"X-Title": "OpenClaw Web Search",
},
body: JSON.stringify(body),
},
body: JSON.stringify(body),
},
});
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "Perplexity");
}
async (res) => {
if (!res.ok) {
return await throwWebSearchApiError(res, "Perplexity");
}
const data = (await res.json()) as PerplexitySearchResponse;
const content = data.choices?.[0]?.message?.content ?? "No response";
const citations = data.citations ?? [];
const data = (await res.json()) as PerplexitySearchResponse;
const content = data.choices?.[0]?.message?.content ?? "No response";
const citations = data.citations ?? [];
return { content, citations };
} finally {
await release();
}
return { content, citations };
},
);
}
async function runGrokSearch(params: {
@@ -948,34 +954,34 @@ async function runGrokSearch(params: {
// citations are returned automatically when available — we just parse
// them from the response without requesting them explicitly (#12910).
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: XAI_API_ENDPOINT,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
return withTrustedWebSearchEndpoint(
{
url: XAI_API_ENDPOINT,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
},
body: JSON.stringify(body),
},
body: JSON.stringify(body),
},
});
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "xAI");
}
async (res) => {
if (!res.ok) {
return await throwWebSearchApiError(res, "xAI");
}
const data = (await res.json()) as GrokSearchResponse;
const { text: extractedText, annotationCitations } = extractGrokContent(data);
const content = extractedText ?? "No response";
// Prefer top-level citations; fall back to annotation-derived ones
const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations;
const inlineCitations = data.inline_citations;
const data = (await res.json()) as GrokSearchResponse;
const { text: extractedText, annotationCitations } = extractGrokContent(data);
const content = extractedText ?? "No response";
// Prefer top-level citations; fall back to annotation-derived ones
const citations = (data.citations ?? []).length > 0 ? data.citations! : annotationCitations;
const inlineCitations = data.inline_citations;
return { content, citations, inlineCitations };
} finally {
await release();
}
return { content, citations, inlineCitations };
},
);
}
function extractKimiMessageText(message: KimiMessage | undefined): string | undefined {
@@ -1047,71 +1053,79 @@ async function runKimiSearch(params: {
const MAX_ROUNDS = 3;
for (let round = 0; round < MAX_ROUNDS; round += 1) {
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
const nextResult = await withTrustedWebSearchEndpoint(
{
url: endpoint,
timeoutSeconds: params.timeoutSeconds,
init: {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
},
body: JSON.stringify({
model: params.model,
messages,
tools: [KIMI_WEB_SEARCH_TOOL],
}),
},
body: JSON.stringify({
model: params.model,
messages,
tools: [KIMI_WEB_SEARCH_TOOL],
}),
},
});
try {
if (!res.ok) {
return await throwWebSearchApiError(res, "Kimi");
}
const data = (await res.json()) as KimiSearchResponse;
for (const citation of extractKimiCitations(data)) {
collectedCitations.add(citation);
}
const choice = data.choices?.[0];
const message = choice?.message;
const text = extractKimiMessageText(message);
const toolCalls = message?.tool_calls ?? [];
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
return { content: text ?? "No response", citations: [...collectedCitations] };
}
messages.push({
role: "assistant",
content: message?.content ?? "",
...(message?.reasoning_content
? {
reasoning_content: message.reasoning_content,
}
: {}),
tool_calls: toolCalls,
});
const toolContent = buildKimiToolResultContent(data);
let pushedToolResult = false;
for (const toolCall of toolCalls) {
const toolCallId = toolCall.id?.trim();
if (!toolCallId) {
continue;
async (
res,
): Promise<{ done: true; content: string; citations: string[] } | { done: false }> => {
if (!res.ok) {
return await throwWebSearchApiError(res, "Kimi");
}
pushedToolResult = true;
messages.push({
role: "tool",
tool_call_id: toolCallId,
content: toolContent,
});
}
if (!pushedToolResult) {
return { content: text ?? "No response", citations: [...collectedCitations] };
}
} finally {
await release();
const data = (await res.json()) as KimiSearchResponse;
for (const citation of extractKimiCitations(data)) {
collectedCitations.add(citation);
}
const choice = data.choices?.[0];
const message = choice?.message;
const text = extractKimiMessageText(message);
const toolCalls = message?.tool_calls ?? [];
if (choice?.finish_reason !== "tool_calls" || toolCalls.length === 0) {
return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
}
messages.push({
role: "assistant",
content: message?.content ?? "",
...(message?.reasoning_content
? {
reasoning_content: message.reasoning_content,
}
: {}),
tool_calls: toolCalls,
});
const toolContent = buildKimiToolResultContent(data);
let pushedToolResult = false;
for (const toolCall of toolCalls) {
const toolCallId = toolCall.id?.trim();
if (!toolCallId) {
continue;
}
pushedToolResult = true;
messages.push({
role: "tool",
tool_call_id: toolCallId,
content: toolContent,
});
}
if (!pushedToolResult) {
return { done: true, content: text ?? "No response", citations: [...collectedCitations] };
}
return { done: false };
},
);
if (nextResult.done) {
return { content: nextResult.content, citations: nextResult.citations };
}
}
@@ -1287,49 +1301,42 @@ async function runWebSearch(params: {
url.searchParams.set("freshness", params.freshness);
}
const { response: res, release } = await fetchTrustedWebSearchEndpoint({
url: url.toString(),
timeoutSeconds: params.timeoutSeconds,
init: {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
const mapped = await withTrustedWebSearchEndpoint(
{
url: url.toString(),
timeoutSeconds: params.timeoutSeconds,
init: {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
},
},
},
});
let mapped: Array<{
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
}> = [];
try {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
const detail = detailResult.text;
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
async (res) => {
if (!res.ok) {
const detailResult = await readResponseText(res, { maxBytes: 64_000 });
const detail = detailResult.text;
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
mapped = results.map((entry) => {
const description = entry.description ?? "";
const title = entry.title ?? "";
const url = entry.url ?? "";
const rawSiteName = resolveSiteName(url);
return {
title: title ? wrapWebContent(title, "web_search") : "",
url, // Keep raw for tool chaining
description: description ? wrapWebContent(description, "web_search") : "",
published: entry.age || undefined,
siteName: rawSiteName || undefined,
};
});
} finally {
await release();
}
const data = (await res.json()) as BraveSearchResponse;
const results = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
return results.map((entry) => {
const description = entry.description ?? "";
const title = entry.title ?? "";
const url = entry.url ?? "";
const rawSiteName = resolveSiteName(url);
return {
title: title ? wrapWebContent(title, "web_search") : "",
url, // Keep raw for tool chaining
description: description ? wrapWebContent(description, "web_search") : "",
published: entry.age || undefined,
siteName: rawSiteName || undefined,
};
});
},
);
const payload = {
query: params.query,

View File

@@ -46,6 +46,29 @@ function createKimiSearchTool(kimiConfig?: { apiKey?: string; baseUrl?: string;
});
}
function createProviderSearchTool(provider: "brave" | "perplexity" | "grok" | "gemini" | "kimi") {
const searchConfig =
provider === "perplexity"
? { provider, perplexity: { apiKey: "pplx-config-test" } }
: provider === "grok"
? { provider, grok: { apiKey: "xai-config-test" } }
: provider === "gemini"
? { provider, gemini: { apiKey: "gemini-config-test" } }
: provider === "kimi"
? { provider, kimi: { apiKey: "moonshot-config-test" } }
: { provider, apiKey: "brave-config-test" };
return createWebSearchTool({
config: {
tools: {
web: {
search: searchConfig,
},
},
},
sandboxed: true,
});
}
function parseFirstRequestBody(mockFetch: ReturnType<typeof installMockFetch>) {
const request = mockFetch.mock.calls[0]?.[1] as RequestInit | undefined;
const requestBody = request?.body;
@@ -62,6 +85,34 @@ function installPerplexitySuccessFetch() {
});
}
function createProviderSuccessPayload(
provider: "brave" | "perplexity" | "grok" | "gemini" | "kimi",
) {
if (provider === "brave") {
return { web: { results: [] } };
}
if (provider === "perplexity") {
return { choices: [{ message: { content: "ok" } }], citations: [] };
}
if (provider === "grok") {
return { output_text: "ok", citations: [] };
}
if (provider === "gemini") {
return {
candidates: [
{
content: { parts: [{ text: "ok" }] },
groundingMetadata: { groundingChunks: [] },
},
],
};
}
return {
choices: [{ finish_reason: "stop", message: { role: "assistant", content: "ok" } }],
search_results: [],
};
}
async function executePerplexitySearch(
query: string,
options?: {
@@ -159,6 +210,32 @@ describe("web_search country and language parameters", () => {
});
});
describe("web_search provider proxy dispatch", () => {
const priorFetch = global.fetch;
afterEach(() => {
vi.unstubAllEnvs();
global.fetch = priorFetch;
});
it.each(["brave", "perplexity", "grok", "gemini", "kimi"] as const)(
"uses proxy-aware dispatcher for %s provider when HTTP_PROXY is configured",
async (provider) => {
vi.stubEnv("HTTP_PROXY", "http://127.0.0.1:7890");
const mockFetch = installMockFetch(createProviderSuccessPayload(provider));
const tool = createProviderSearchTool(provider);
expect(tool).not.toBeNull();
await tool?.execute?.("call-1", { query: `proxy-${provider}-test` });
const requestInit = mockFetch.mock.calls[0]?.[1] as
| (RequestInit & { dispatcher?: unknown })
| undefined;
expect(requestInit?.dispatcher).toBeInstanceOf(EnvHttpProxyAgent);
},
);
});
describe("web_search perplexity baseUrl defaults", () => {
const priorFetch = global.fetch;