* feat(browser): add batch actions, CSS selector support, and click delayMs Adds three improvements to the browser act tool: 1. CSS selector support: All element-targeting actions (click, type, hover, drag, scrollIntoView, select) now accept an optional 'selector' parameter alongside 'ref'. When selector is provided, Playwright's page.locator() is used directly, skipping the need for a snapshot to obtain refs. This reduces roundtrips for agents that already know the DOM structure. 2. Click delay (delayMs): The click action now accepts an optional 'delayMs' parameter. When set, the element is hovered first, then after the specified delay, clicked. This enables human-like hover-before-click in a single tool call instead of three (hover + wait + click). 3. Batch actions: New 'batch' action kind that accepts an array of actions to execute sequentially in a single tool call. Supports 'stopOnError' (default true) to control whether execution halts on first failure. Results are returned as an array. This eliminates the AI inference roundtrip between each action, dramatically reducing latency and token cost for multi-step flows. Addresses: #44431, #38844 * fix(browser): address security review — batch evaluateEnabled guard, input validation, recursion limit Fixes all 4 issues raised by Greptile review: 1. Security: batch actions now respect evaluateEnabled flag. executeSingleAction and batchViaPlaywright accept evaluateEnabled param. evaluate and wait-with-fn inside batches are rejected when evaluateEnabled=false, matching the direct route guards. 2. Security: batch input validation. Each action in body.actions is validated as a plain object with a known kind string before dispatch. Applies same normalization as direct action handlers. 3. Perf: SELECTOR_ALLOWED_KINDS moved to module scope as a ReadonlySet<string> constant (was re-created on every request). 4. Security: max batch nesting depth of 5. Nested batch actions track depth and throw if MAX_BATCH_DEPTH exceeded, preventing call stack exhaustion from crafted payloads. * fix(browser): normalize batch act dispatch * fix(browser): tighten existing-session act typing * fix(browser): preserve batch type text * fix(browser): complete batch action execution * test(browser): cover batch route normalization * test(browser): cover batch interaction dispatch * fix(browser): bound batch route action inputs * fix(browser): harden batch interaction limits * test(browser): cover batch security guardrails --------- Co-authored-by: Diwakar <diwakarrankawat@gmail.com>
86 lines
2.5 KiB
TypeScript
86 lines
2.5 KiB
TypeScript
import { parseRoleRef } from "./pw-role-snapshot.js";
|
|
|
|
let nextUploadArmId = 0;
|
|
let nextDialogArmId = 0;
|
|
let nextDownloadArmId = 0;
|
|
|
|
export function bumpUploadArmId(): number {
|
|
nextUploadArmId += 1;
|
|
return nextUploadArmId;
|
|
}
|
|
|
|
export function bumpDialogArmId(): number {
|
|
nextDialogArmId += 1;
|
|
return nextDialogArmId;
|
|
}
|
|
|
|
export function bumpDownloadArmId(): number {
|
|
nextDownloadArmId += 1;
|
|
return nextDownloadArmId;
|
|
}
|
|
|
|
export function requireRef(value: unknown): string {
|
|
const raw = typeof value === "string" ? value.trim() : "";
|
|
const roleRef = raw ? parseRoleRef(raw) : null;
|
|
const ref = roleRef ?? (raw.startsWith("@") ? raw.slice(1) : raw);
|
|
if (!ref) {
|
|
throw new Error("ref is required");
|
|
}
|
|
return ref;
|
|
}
|
|
|
|
export function requireRefOrSelector(
|
|
ref: string | undefined,
|
|
selector: string | undefined,
|
|
): { ref?: string; selector?: string } {
|
|
const trimmedRef = typeof ref === "string" ? ref.trim() : "";
|
|
const trimmedSelector = typeof selector === "string" ? selector.trim() : "";
|
|
if (!trimmedRef && !trimmedSelector) {
|
|
throw new Error("ref or selector is required");
|
|
}
|
|
return {
|
|
ref: trimmedRef || undefined,
|
|
selector: trimmedSelector || undefined,
|
|
};
|
|
}
|
|
|
|
export function normalizeTimeoutMs(timeoutMs: number | undefined, fallback: number) {
|
|
return Math.max(500, Math.min(120_000, timeoutMs ?? fallback));
|
|
}
|
|
|
|
export function toAIFriendlyError(error: unknown, selector: string): Error {
|
|
const message = error instanceof Error ? error.message : String(error);
|
|
|
|
if (message.includes("strict mode violation")) {
|
|
const countMatch = message.match(/resolved to (\d+) elements/);
|
|
const count = countMatch ? countMatch[1] : "multiple";
|
|
return new Error(
|
|
`Selector "${selector}" matched ${count} elements. ` +
|
|
`Run a new snapshot to get updated refs, or use a different ref.`,
|
|
);
|
|
}
|
|
|
|
if (
|
|
(message.includes("Timeout") || message.includes("waiting for")) &&
|
|
(message.includes("to be visible") || message.includes("not visible"))
|
|
) {
|
|
return new Error(
|
|
`Element "${selector}" not found or not visible. ` +
|
|
`Run a new snapshot to see current page elements.`,
|
|
);
|
|
}
|
|
|
|
if (
|
|
message.includes("intercepts pointer events") ||
|
|
message.includes("not visible") ||
|
|
message.includes("not receive pointer events")
|
|
) {
|
|
return new Error(
|
|
`Element "${selector}" is not interactable (hidden or covered). ` +
|
|
`Try scrolling it into view, closing overlays, or re-snapshotting.`,
|
|
);
|
|
}
|
|
|
|
return error instanceof Error ? error : new Error(message);
|
|
}
|