From 7b5e64ef2e369258e2a4a613b7a62db3c21e5160 Mon Sep 17 00:00:00 2001 From: Ayaan Zaidi Date: Wed, 4 Mar 2026 17:17:24 +0530 Subject: [PATCH] fix: preserve raw media invoke for HTTP tool clients (#34365) --- CHANGELOG.md | 1 + src/agents/openclaw-tools.camera.test.ts | 50 +++++++++++++++++++++--- src/agents/openclaw-tools.ts | 3 ++ src/agents/tools/nodes-tool.ts | 3 +- src/gateway/tools-invoke-http.test.ts | 1 + src/gateway/tools-invoke-http.ts | 2 + 6 files changed, 54 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f03662be1..fb53bd780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/HTTP tools invoke media compatibility: preserve raw media payload access for direct `/tools/invoke` clients by allowing media `nodes` invoke commands only in HTTP tool context, while keeping agent-context media invoke blocking to prevent base64 prompt bloat. (#34365) Thanks @obviyus. - Agents/Nodes media outputs: add dedicated `photos_latest` action handling, block media-returning `nodes invoke` commands, keep metadata-only `camera.list` invoke allowed, and normalize empty `photos_latest` results to a consistent response shape to prevent base64 context bloat. (#34332) Thanks @obviyus. - TUI/session-key canonicalization: normalize `openclaw tui --session` values to lowercase so uppercase session names no longer drop real-time streaming updates due to gateway/TUI key mismatches. (#33866, #34013) thanks @lynnzc. - Outbound/send config threading: pass resolved SecretRef config through outbound adapters and helper send paths so send flows do not reload unresolved runtime config. (#33987) Thanks @joshavant. diff --git a/src/agents/openclaw-tools.camera.test.ts b/src/agents/openclaw-tools.camera.test.ts index 9621c55c1..db41cd285 100644 --- a/src/agents/openclaw-tools.camera.test.ts +++ b/src/agents/openclaw-tools.camera.test.ts @@ -32,10 +32,18 @@ function unexpectedGatewayMethod(method: unknown): never { throw new Error(`unexpected method: ${String(method)}`); } -function getNodesTool(options?: { modelHasVision?: boolean }) { - const tool = createOpenClawTools( - options?.modelHasVision !== undefined ? { modelHasVision: options.modelHasVision } : {}, - ).find((candidate) => candidate.name === "nodes"); +function getNodesTool(options?: { modelHasVision?: boolean; allowMediaInvokeCommands?: boolean }) { + const toolOptions: { + modelHasVision?: boolean; + allowMediaInvokeCommands?: boolean; + } = {}; + if (options?.modelHasVision !== undefined) { + toolOptions.modelHasVision = options.modelHasVision; + } + if (options?.allowMediaInvokeCommands !== undefined) { + toolOptions.allowMediaInvokeCommands = options.allowMediaInvokeCommands; + } + const tool = createOpenClawTools(toolOptions).find((candidate) => candidate.name === "nodes"); if (!tool) { throw new Error("missing nodes tool"); } @@ -44,7 +52,7 @@ function getNodesTool(options?: { modelHasVision?: boolean }) { async function executeNodes( input: Record, - options?: { modelHasVision?: boolean }, + options?: { modelHasVision?: boolean; allowMediaInvokeCommands?: boolean }, ) { return getNodesTool(options).execute("call1", input as never); } @@ -777,4 +785,36 @@ describe("nodes invoke", () => { }), ).rejects.toThrow(/use action="photos_latest"/i); }); + + it("allows media invoke commands when explicitly enabled", async () => { + setupNodeInvokeMock({ + onInvoke: (invokeParams) => { + expect(invokeParams).toMatchObject({ + command: "photos.latest", + params: { limit: 1 }, + }); + return { + payload: { + photos: [{ format: "jpg", base64: "aGVsbG8=", width: 1, height: 1 }], + }, + }; + }, + }); + + const result = await executeNodes( + { + action: "invoke", + node: NODE_ID, + invokeCommand: "photos.latest", + invokeParamsJson: '{"limit":1}', + }, + { allowMediaInvokeCommands: true }, + ); + + expect(result.details).toMatchObject({ + payload: { + photos: [{ format: "jpg", base64: "aGVsbG8=", width: 1, height: 1 }], + }, + }); + }); }); diff --git a/src/agents/openclaw-tools.ts b/src/agents/openclaw-tools.ts index b09f78212..4373bf83c 100644 --- a/src/agents/openclaw-tools.ts +++ b/src/agents/openclaw-tools.ts @@ -60,6 +60,8 @@ export function createOpenClawTools(options?: { hasRepliedRef?: { value: boolean }; /** If true, the model has native vision capability */ modelHasVision?: boolean; + /** If true, nodes action="invoke" can call media-returning commands directly. */ + allowMediaInvokeCommands?: boolean; /** Explicit agent ID override for cron/hook sessions. */ requesterAgentIdOverride?: string; /** Require explicit message targets (no implicit last-route sends). */ @@ -137,6 +139,7 @@ export function createOpenClawTools(options?: { currentThreadTs: options?.currentThreadTs, config: options?.config, modelHasVision: options?.modelHasVision, + allowMediaInvokeCommands: options?.allowMediaInvokeCommands, }), createCronTool({ agentSessionKey: options?.agentSessionKey, diff --git a/src/agents/tools/nodes-tool.ts b/src/agents/tools/nodes-tool.ts index 6572ea412..b90d42911 100644 --- a/src/agents/tools/nodes-tool.ts +++ b/src/agents/tools/nodes-tool.ts @@ -161,6 +161,7 @@ export function createNodesTool(options?: { currentThreadTs?: string | number; config?: OpenClawConfig; modelHasVision?: boolean; + allowMediaInvokeCommands?: boolean; }): AnyAgentTool { const sessionKey = options?.agentSessionKey?.trim() || undefined; const turnSourceChannel = options?.agentChannel?.trim() || undefined; @@ -754,7 +755,7 @@ export function createNodesTool(options?: { const invokeCommandNormalized = invokeCommand.trim().toLowerCase(); const dedicatedAction = MEDIA_INVOKE_ACTIONS[invokeCommandNormalized as keyof typeof MEDIA_INVOKE_ACTIONS]; - if (dedicatedAction) { + if (dedicatedAction && !options?.allowMediaInvokeCommands) { throw new Error( `invokeCommand "${invokeCommand}" returns media payloads and is blocked to prevent base64 context bloat; use action="${dedicatedAction}"`, ); diff --git a/src/gateway/tools-invoke-http.test.ts b/src/gateway/tools-invoke-http.test.ts index 20a2f2c2c..66a68bf5d 100644 --- a/src/gateway/tools-invoke-http.test.ts +++ b/src/gateway/tools-invoke-http.test.ts @@ -335,6 +335,7 @@ describe("POST /tools/invoke", () => { const body = await res.json(); expect(body.ok).toBe(true); expect(body).toHaveProperty("result"); + expect(lastCreateOpenClawToolsContext?.allowMediaInvokeCommands).toBe(true); }); it("supports tools.alsoAllow in profile and implicit modes", async () => { diff --git a/src/gateway/tools-invoke-http.ts b/src/gateway/tools-invoke-http.ts index caf71c56c..88cea7b38 100644 --- a/src/gateway/tools-invoke-http.ts +++ b/src/gateway/tools-invoke-http.ts @@ -252,6 +252,8 @@ export async function handleToolsInvokeHttpRequest( agentAccountId: accountId, agentTo, agentThreadId, + // HTTP callers consume tool output directly; preserve raw media invoke payloads. + allowMediaInvokeCommands: true, config: cfg, pluginToolAllowlist: collectExplicitAllowlist([ profilePolicy,