Files
Moltbot/src/agents/tools/browser-tool.ts
2026-03-05 16:40:52 -08:00

658 lines
21 KiB
TypeScript

import crypto from "node:crypto";
import {
browserAct,
browserArmDialog,
browserArmFileChooser,
browserNavigate,
browserPdfSave,
browserScreenshotAction,
} from "../../browser/client-actions.js";
import {
browserCloseTab,
browserFocusTab,
browserOpenTab,
browserProfiles,
browserStart,
browserStatus,
browserStop,
} from "../../browser/client.js";
import { resolveBrowserConfig } from "../../browser/config.js";
import { DEFAULT_UPLOAD_DIR, resolveExistingPathsWithinRoot } from "../../browser/paths.js";
import { applyBrowserProxyPaths, persistBrowserProxyFiles } from "../../browser/proxy-files.js";
import {
trackSessionBrowserTab,
untrackSessionBrowserTab,
} from "../../browser/session-tab-registry.js";
import { loadConfig } from "../../config/config.js";
import {
executeActAction,
executeConsoleAction,
executeSnapshotAction,
executeTabsAction,
} from "./browser-tool.actions.js";
import { BrowserToolSchema } from "./browser-tool.schema.js";
import { type AnyAgentTool, imageResultFromFile, jsonResult, readStringParam } from "./common.js";
import { callGatewayTool } from "./gateway.js";
import {
listNodes,
resolveNodeIdFromList,
selectDefaultNodeFromList,
type NodeListNode,
} from "./nodes-utils.js";
function readOptionalTargetAndTimeout(params: Record<string, unknown>) {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const timeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? params.timeoutMs
: undefined;
return { targetId, timeoutMs };
}
function readTargetUrlParam(params: Record<string, unknown>) {
return (
readStringParam(params, "targetUrl") ??
readStringParam(params, "url", { required: true, label: "targetUrl" })
);
}
const LEGACY_BROWSER_ACT_REQUEST_KEYS = [
"targetId",
"ref",
"doubleClick",
"button",
"modifiers",
"text",
"submit",
"slowly",
"key",
"delayMs",
"startRef",
"endRef",
"values",
"fields",
"width",
"height",
"timeMs",
"textGone",
"selector",
"url",
"loadState",
"fn",
"timeoutMs",
] as const;
function readActRequestParam(params: Record<string, unknown>) {
const requestParam = params.request;
if (requestParam && typeof requestParam === "object") {
return requestParam as Parameters<typeof browserAct>[1];
}
const kind = readStringParam(params, "kind");
if (!kind) {
return undefined;
}
const request: Record<string, unknown> = { kind };
for (const key of LEGACY_BROWSER_ACT_REQUEST_KEYS) {
if (!Object.hasOwn(params, key)) {
continue;
}
request[key] = params[key];
}
return request as Parameters<typeof browserAct>[1];
}
type BrowserProxyFile = {
path: string;
base64: string;
mimeType?: string;
};
type BrowserProxyResult = {
result: unknown;
files?: BrowserProxyFile[];
};
const DEFAULT_BROWSER_PROXY_TIMEOUT_MS = 20_000;
type BrowserNodeTarget = {
nodeId: string;
label?: string;
};
function isBrowserNode(node: NodeListNode) {
const caps = Array.isArray(node.caps) ? node.caps : [];
const commands = Array.isArray(node.commands) ? node.commands : [];
return caps.includes("browser") || commands.includes("browser.proxy");
}
async function resolveBrowserNodeTarget(params: {
requestedNode?: string;
target?: "sandbox" | "host" | "node";
sandboxBridgeUrl?: string;
}): Promise<BrowserNodeTarget | null> {
const cfg = loadConfig();
const policy = cfg.gateway?.nodes?.browser;
const mode = policy?.mode ?? "auto";
if (mode === "off") {
if (params.target === "node" || params.requestedNode) {
throw new Error("Node browser proxy is disabled (gateway.nodes.browser.mode=off).");
}
return null;
}
if (params.sandboxBridgeUrl?.trim() && params.target !== "node" && !params.requestedNode) {
return null;
}
if (params.target && params.target !== "node") {
return null;
}
if (mode === "manual" && params.target !== "node" && !params.requestedNode) {
return null;
}
const nodes = await listNodes({});
const browserNodes = nodes.filter((node) => node.connected && isBrowserNode(node));
if (browserNodes.length === 0) {
if (params.target === "node" || params.requestedNode) {
throw new Error("No connected browser-capable nodes.");
}
return null;
}
const requested = params.requestedNode?.trim() || policy?.node?.trim();
if (requested) {
const nodeId = resolveNodeIdFromList(browserNodes, requested, false);
const node = browserNodes.find((entry) => entry.nodeId === nodeId);
return { nodeId, label: node?.displayName ?? node?.remoteIp ?? nodeId };
}
const selected = selectDefaultNodeFromList(browserNodes, {
preferLocalMac: false,
fallback: "none",
});
if (params.target === "node") {
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
throw new Error(
`Multiple browser-capable nodes connected (${browserNodes.length}). Set gateway.nodes.browser.node or pass node=<id>.`,
);
}
if (mode === "manual") {
return null;
}
if (selected) {
return {
nodeId: selected.nodeId,
label: selected.displayName ?? selected.remoteIp ?? selected.nodeId,
};
}
return null;
}
async function callBrowserProxy(params: {
nodeId: string;
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}): Promise<BrowserProxyResult> {
const gatewayTimeoutMs =
typeof params.timeoutMs === "number" && Number.isFinite(params.timeoutMs)
? Math.max(1, Math.floor(params.timeoutMs))
: DEFAULT_BROWSER_PROXY_TIMEOUT_MS;
const payload = await callGatewayTool<{ payloadJSON?: string; payload?: string }>(
"node.invoke",
{ timeoutMs: gatewayTimeoutMs },
{
nodeId: params.nodeId,
command: "browser.proxy",
params: {
method: params.method,
path: params.path,
query: params.query,
body: params.body,
timeoutMs: params.timeoutMs,
profile: params.profile,
},
idempotencyKey: crypto.randomUUID(),
},
);
const parsed =
payload?.payload ??
(typeof payload?.payloadJSON === "string" && payload.payloadJSON
? (JSON.parse(payload.payloadJSON) as BrowserProxyResult)
: null);
if (!parsed || typeof parsed !== "object" || !("result" in parsed)) {
throw new Error("browser proxy failed");
}
return parsed;
}
async function persistProxyFiles(files: BrowserProxyFile[] | undefined) {
return await persistBrowserProxyFiles(files);
}
function applyProxyPaths(result: unknown, mapping: Map<string, string>) {
applyBrowserProxyPaths(result, mapping);
}
function resolveBrowserBaseUrl(params: {
target?: "sandbox" | "host";
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
}): string | undefined {
const cfg = loadConfig();
const resolved = resolveBrowserConfig(cfg.browser, cfg);
const normalizedSandbox = params.sandboxBridgeUrl?.trim() ?? "";
const target = params.target ?? (normalizedSandbox ? "sandbox" : "host");
if (target === "sandbox") {
if (!normalizedSandbox) {
throw new Error(
'Sandbox browser is unavailable. Enable agents.defaults.sandbox.browser.enabled or use target="host" if allowed.',
);
}
return normalizedSandbox.replace(/\/$/, "");
}
if (params.allowHostControl === false) {
throw new Error("Host browser control is disabled by sandbox policy.");
}
if (!resolved.enabled) {
throw new Error(
"Browser control is disabled. Set browser.enabled=true in ~/.openclaw/openclaw.json.",
);
}
return undefined;
}
export function createBrowserTool(opts?: {
sandboxBridgeUrl?: string;
allowHostControl?: boolean;
agentSessionKey?: string;
}): AnyAgentTool {
const targetDefault = opts?.sandboxBridgeUrl ? "sandbox" : "host";
const hostHint =
opts?.allowHostControl === false ? "Host target blocked by policy." : "Host target allowed.";
return {
label: "Browser",
name: "browser",
description: [
"Control the browser via OpenClaw's browser control server (status/start/stop/profiles/tabs/open/snapshot/screenshot/actions).",
'Profiles: use profile="chrome" for Chrome extension relay takeover (your existing Chrome tabs). Use profile="openclaw" for the isolated openclaw-managed browser.',
'If the user mentions the Chrome extension / Browser Relay / toolbar button / “attach tab”, ALWAYS use profile="chrome" (do not ask which profile).',
'When a node-hosted browser proxy is available, the tool may auto-route to it. Pin a node with node=<id|name> or target="node".',
"Chrome extension relay needs an attached tab: user must click the OpenClaw Browser Relay toolbar icon on the tab (badge ON). If no tab is connected, ask them to attach it.",
"When using refs from snapshot (e.g. e12), keep the same tab: prefer passing targetId from the snapshot response into subsequent actions (act/click/type/etc).",
'For stable, self-resolving refs across calls, use snapshot with refs="aria" (Playwright aria-ref ids). Default refs="role" are role+name-based.',
"Use snapshot+act for UI automation. Avoid act:wait by default; use only in exceptional cases when no reliable UI state exists.",
`target selects browser location (sandbox|host|node). Default: ${targetDefault}.`,
hostHint,
].join(" "),
parameters: BrowserToolSchema,
execute: async (_toolCallId, args) => {
const params = args as Record<string, unknown>;
const action = readStringParam(params, "action", { required: true });
const profile = readStringParam(params, "profile");
const requestedNode = readStringParam(params, "node");
let target = readStringParam(params, "target") as "sandbox" | "host" | "node" | undefined;
if (requestedNode && target && target !== "node") {
throw new Error('node is only supported with target="node".');
}
if (!target && !requestedNode && profile === "chrome") {
// Chrome extension relay takeover is a host Chrome feature; prefer host unless explicitly targeting a node.
target = "host";
}
const nodeTarget = await resolveBrowserNodeTarget({
requestedNode: requestedNode ?? undefined,
target,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
});
const resolvedTarget = target === "node" ? undefined : target;
const baseUrl = nodeTarget
? undefined
: resolveBrowserBaseUrl({
target: resolvedTarget,
sandboxBridgeUrl: opts?.sandboxBridgeUrl,
allowHostControl: opts?.allowHostControl,
});
const proxyRequest = nodeTarget
? async (opts: {
method: string;
path: string;
query?: Record<string, string | number | boolean | undefined>;
body?: unknown;
timeoutMs?: number;
profile?: string;
}) => {
const proxy = await callBrowserProxy({
nodeId: nodeTarget.nodeId,
method: opts.method,
path: opts.path,
query: opts.query,
body: opts.body,
timeoutMs: opts.timeoutMs,
profile: opts.profile,
});
const mapping = await persistProxyFiles(proxy.files);
applyProxyPaths(proxy.result, mapping);
return proxy.result;
}
: null;
switch (action) {
case "status":
if (proxyRequest) {
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
return jsonResult(await browserStatus(baseUrl, { profile }));
case "start":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/start",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserStart(baseUrl, { profile });
return jsonResult(await browserStatus(baseUrl, { profile }));
case "stop":
if (proxyRequest) {
await proxyRequest({
method: "POST",
path: "/stop",
profile,
});
return jsonResult(
await proxyRequest({
method: "GET",
path: "/",
profile,
}),
);
}
await browserStop(baseUrl, { profile });
return jsonResult(await browserStatus(baseUrl, { profile }));
case "profiles":
if (proxyRequest) {
const result = await proxyRequest({
method: "GET",
path: "/profiles",
});
return jsonResult(result);
}
return jsonResult({ profiles: await browserProfiles(baseUrl) });
case "tabs":
return await executeTabsAction({ baseUrl, profile, proxyRequest });
case "open": {
const targetUrl = readTargetUrlParam(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/open",
profile,
body: { url: targetUrl },
});
return jsonResult(result);
}
const opened = await browserOpenTab(baseUrl, targetUrl, { profile });
trackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId: opened.targetId,
baseUrl,
profile,
});
return jsonResult(opened);
}
case "focus": {
const targetId = readStringParam(params, "targetId", {
required: true,
});
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/tabs/focus",
profile,
body: { targetId },
});
return jsonResult(result);
}
await browserFocusTab(baseUrl, targetId, { profile });
return jsonResult({ ok: true });
}
case "close": {
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = targetId
? await proxyRequest({
method: "DELETE",
path: `/tabs/${encodeURIComponent(targetId)}`,
profile,
})
: await proxyRequest({
method: "POST",
path: "/act",
profile,
body: { kind: "close" },
});
return jsonResult(result);
}
if (targetId) {
await browserCloseTab(baseUrl, targetId, { profile });
untrackSessionBrowserTab({
sessionKey: opts?.agentSessionKey,
targetId,
baseUrl,
profile,
});
} else {
await browserAct(baseUrl, { kind: "close" }, { profile });
}
return jsonResult({ ok: true });
}
case "snapshot":
return await executeSnapshotAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "screenshot": {
const targetId = readStringParam(params, "targetId");
const fullPage = Boolean(params.fullPage);
const ref = readStringParam(params, "ref");
const element = readStringParam(params, "element");
const type = params.type === "jpeg" ? "jpeg" : "png";
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/screenshot",
profile,
body: {
targetId,
fullPage,
ref,
element,
type,
},
})) as Awaited<ReturnType<typeof browserScreenshotAction>>)
: await browserScreenshotAction(baseUrl, {
targetId,
fullPage,
ref,
element,
type,
profile,
});
return await imageResultFromFile({
label: "browser:screenshot",
path: result.path,
details: result,
});
}
case "navigate": {
const targetUrl = readTargetUrlParam(params);
const targetId = readStringParam(params, "targetId");
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/navigate",
profile,
body: {
url: targetUrl,
targetId,
},
});
return jsonResult(result);
}
return jsonResult(
await browserNavigate(baseUrl, {
url: targetUrl,
targetId,
profile,
}),
);
}
case "console":
return await executeConsoleAction({
input: params,
baseUrl,
profile,
proxyRequest,
});
case "pdf": {
const targetId = typeof params.targetId === "string" ? params.targetId.trim() : undefined;
const result = proxyRequest
? ((await proxyRequest({
method: "POST",
path: "/pdf",
profile,
body: { targetId },
})) as Awaited<ReturnType<typeof browserPdfSave>>)
: await browserPdfSave(baseUrl, { targetId, profile });
return {
content: [{ type: "text" as const, text: `FILE:${result.path}` }],
details: result,
};
}
case "upload": {
const paths = Array.isArray(params.paths) ? params.paths.map((p) => String(p)) : [];
if (paths.length === 0) {
throw new Error("paths required");
}
const uploadPathsResult = await resolveExistingPathsWithinRoot({
rootDir: DEFAULT_UPLOAD_DIR,
requestedPaths: paths,
scopeLabel: `uploads directory (${DEFAULT_UPLOAD_DIR})`,
});
if (!uploadPathsResult.ok) {
throw new Error(uploadPathsResult.error);
}
const normalizedPaths = uploadPathsResult.paths;
const ref = readStringParam(params, "ref");
const inputRef = readStringParam(params, "inputRef");
const element = readStringParam(params, "element");
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/file-chooser",
profile,
body: {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserArmFileChooser(baseUrl, {
paths: normalizedPaths,
ref,
inputRef,
element,
targetId,
timeoutMs,
profile,
}),
);
}
case "dialog": {
const accept = Boolean(params.accept);
const promptText = typeof params.promptText === "string" ? params.promptText : undefined;
const { targetId, timeoutMs } = readOptionalTargetAndTimeout(params);
if (proxyRequest) {
const result = await proxyRequest({
method: "POST",
path: "/hooks/dialog",
profile,
body: {
accept,
promptText,
targetId,
timeoutMs,
},
});
return jsonResult(result);
}
return jsonResult(
await browserArmDialog(baseUrl, {
accept,
promptText,
targetId,
timeoutMs,
profile,
}),
);
}
case "act": {
const request = readActRequestParam(params);
if (!request) {
throw new Error("request required");
}
return await executeActAction({
request,
baseUrl,
profile,
proxyRequest,
});
}
default:
throw new Error(`Unknown action: ${action}`);
}
},
};
}