diff --git a/CHANGELOG.md b/CHANGELOG.md index 9616853b2..06bde128a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai - TUI/Input: enable multiline-paste burst coalescing on macOS Terminal.app and iTerm so pasted blocks no longer submit line-by-line as separate messages. (#18809) Thanks @fwends. - TUI/Status: request immediate renders after setting `sending`/`waiting` activity states so in-flight runs always show visible progress indicators instead of appearing idle until completion. (#21549) Thanks @13Guinness. - Agents/Fallbacks: treat JSON payloads with `type: "api_error"` + `"Internal server error"` as transient failover errors so Anthropic 500-style failures trigger model fallback. (#23193) Thanks @jarvis-lane. +- Agents/Transcripts: validate assistant tool-call names (syntax/length + registered tool allowlist) before transcript persistence and during replay sanitization so malformed failover tool names no longer poison sessions with repeated provider HTTP 400 errors. (#23324) Thanks @johnsantry. - Agents/Diagnostics: include resolved lifecycle error text in `embedded run agent end` warnings so UI/TUI “Connection error” runs expose actionable provider failure reasons in gateway logs. (#23054) Thanks @Raize. - Plugins/Hooks: run legacy `before_agent_start` once per agent turn and reuse that result across model-resolve and prompt-build compatibility paths, preventing duplicate hook side effects (for example duplicate external API calls). (#23289) Thanks @ksato8710. - Models/Config: default missing Anthropic provider/model `api` fields to `anthropic-messages` during config validation so custom relay model entries are preserved instead of being dropped by runtime model registry validation. (#23332) Thanks @bigbigmonkey123. diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 665e98798..d72589628 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -203,6 +203,54 @@ describe("sanitizeSessionHistory", () => { expect(result.map((msg) => msg.role)).toEqual(["user"]); }); + it("drops malformed tool calls with invalid/overlong names", async () => { + const messages = [ + { + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_bad", + name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"', + arguments: {}, + }, + { type: "toolCall", id: "call_long", name: `read_${"x".repeat(80)}`, arguments: {} }, + ], + }, + { role: "user", content: "hello" }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + sessionManager: mockSessionManager, + sessionId: TEST_SESSION_ID, + }); + + expect(result.map((msg) => msg.role)).toEqual(["user"]); + }); + + it("drops tool calls that are not in the allowed tool set", async () => { + const messages = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }], + }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + allowedToolNames: ["read"], + sessionManager: mockSessionManager, + sessionId: TEST_SESSION_ID, + }); + + expect(result).toEqual([]); + }); + it("downgrades orphaned openai reasoning even when the model has not changed", async () => { const sessionEntries = [ makeModelSnapshotEntry({ diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 865cdd5c7..ffb42c6e2 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -78,6 +78,7 @@ import { buildEmbeddedSystemPrompt, createSystemPromptOverride, } from "./system-prompt.js"; +import { collectAllowedToolNames } from "./tool-name-allowlist.js"; import { splitSdkTools } from "./tool-split.js"; import type { EmbeddedPiCompactResult } from "./types.js"; import { describeUnknownError, mapThinkingLevel } from "./utils.js"; @@ -383,6 +384,7 @@ export async function compactEmbeddedPiSessionDirect( modelAuthMode: resolveModelAuthMode(model.provider, params.config), }); const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider }); + const allowedToolNames = collectAllowedToolNames({ tools }); logToolSchemasForGoogle({ tools, provider }); const machineName = await getMachineDisplayName(); const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider); @@ -532,6 +534,7 @@ export async function compactEmbeddedPiSessionDirect( agentId: sessionAgentId, sessionKey: params.sessionKey, allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + allowedToolNames, }); trackSessionManagerAccess(params.sessionFile); const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir); @@ -587,6 +590,7 @@ export async function compactEmbeddedPiSessionDirect( modelApi: model.api, modelId, provider, + allowedToolNames, config: params.config, sessionManager, sessionId: params.sessionId, diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index f9c6c2c64..544d45f29 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -426,6 +426,7 @@ export async function sanitizeSessionHistory(params: { modelApi?: string | null; modelId?: string; provider?: string; + allowedToolNames?: Iterable; config?: OpenClawConfig; sessionManager: SessionManager; sessionId: string; @@ -458,7 +459,9 @@ export async function sanitizeSessionHistory(params: { const sanitizedThinking = policy.sanitizeThinkingSignatures ? sanitizeAntigravityThinkingBlocks(droppedThinking) : droppedThinking; - const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking); + const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking, { + allowedToolNames: params.allowedToolNames, + }); const repairedTools = policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolCalls) : sanitizedToolCalls; diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index d967c5f15..0ddc8899a 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -105,6 +105,7 @@ import { createSystemPromptOverride, } from "../system-prompt.js"; import { dropThinkingBlocks } from "../thinking.js"; +import { collectAllowedToolNames } from "../tool-name-allowlist.js"; import { installToolResultContextGuard } from "../tool-result-context-guard.js"; import { splitSdkTools } from "../tool-split.js"; import { describeUnknownError, mapThinkingLevel } from "../utils.js"; @@ -395,6 +396,10 @@ export async function runEmbeddedAttempt( disableMessageTool: params.disableMessageTool, }); const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider }); + const allowedToolNames = collectAllowedToolNames({ + tools, + clientTools: params.clientTools, + }); logToolSchemasForGoogle({ tools, provider: params.provider }); const machineName = await getMachineDisplayName(); @@ -591,6 +596,7 @@ export async function runEmbeddedAttempt( sessionKey: params.sessionKey, inputProvenance: params.inputProvenance, allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + allowedToolNames, }); trackSessionManagerAccess(params.sessionFile); @@ -777,6 +783,7 @@ export async function runEmbeddedAttempt( modelApi: params.model.api, modelId: params.modelId, provider: params.provider, + allowedToolNames, config: params.config, sessionManager, sessionId: params.sessionId, diff --git a/src/agents/pi-embedded-runner/tool-name-allowlist.ts b/src/agents/pi-embedded-runner/tool-name-allowlist.ts new file mode 100644 index 000000000..ca3b12234 --- /dev/null +++ b/src/agents/pi-embedded-runner/tool-name-allowlist.ts @@ -0,0 +1,26 @@ +import type { AgentTool } from "@mariozechner/pi-agent-core"; +import type { ClientToolDefinition } from "./run/params.js"; + +function addName(names: Set, value: unknown): void { + if (typeof value !== "string") { + return; + } + const trimmed = value.trim(); + if (trimmed) { + names.add(trimmed); + } +} + +export function collectAllowedToolNames(params: { + tools: AgentTool[]; + clientTools?: ClientToolDefinition[]; +}): Set { + const names = new Set(); + for (const tool of params.tools) { + addName(names, tool.name); + } + for (const tool of params.clientTools ?? []) { + addName(names, tool.function?.name); + } + return names; +} diff --git a/src/agents/session-tool-result-guard-wrapper.ts b/src/agents/session-tool-result-guard-wrapper.ts index 896680234..8570bdd16 100644 --- a/src/agents/session-tool-result-guard-wrapper.ts +++ b/src/agents/session-tool-result-guard-wrapper.ts @@ -22,6 +22,7 @@ export function guardSessionManager( sessionKey?: string; inputProvenance?: InputProvenance; allowSyntheticToolResults?: boolean; + allowedToolNames?: Iterable; }, ): GuardedSessionManager { if (typeof (sessionManager as GuardedSessionManager).flushPendingToolResults === "function") { @@ -64,6 +65,7 @@ export function guardSessionManager( applyInputProvenanceToUserMessage(message, opts?.inputProvenance), transformToolResultForPersistence: transform, allowSyntheticToolResults: opts?.allowSyntheticToolResults, + allowedToolNames: opts?.allowedToolNames, beforeMessageWriteHook: beforeMessageWrite, }); (sessionManager as GuardedSessionManager).flushPendingToolResults = guard.flushPendingToolResults; diff --git a/src/agents/session-tool-result-guard.e2e.test.ts b/src/agents/session-tool-result-guard.e2e.test.ts index 37cf5c96e..7b6566066 100644 --- a/src/agents/session-tool-result-guard.e2e.test.ts +++ b/src/agents/session-tool-result-guard.e2e.test.ts @@ -191,6 +191,43 @@ describe("installSessionToolResultGuard", () => { expect(messages).toHaveLength(0); }); + it("drops malformed tool calls with invalid name tokens before persistence", () => { + const sm = SessionManager.inMemory(); + installSessionToolResultGuard(sm); + + sm.appendMessage( + asAppendMessage({ + role: "assistant", + content: [ + { + type: "toolCall", + id: "call_bad_name", + name: 'toolu_01mvznfebfuu <|tool_call_argument_begin|> {"command"', + arguments: {}, + }, + ], + }), + ); + + expect(getPersistedMessages(sm)).toHaveLength(0); + }); + + it("drops tool calls not present in allowedToolNames", () => { + const sm = SessionManager.inMemory(); + installSessionToolResultGuard(sm, { + allowedToolNames: ["read"], + }); + + sm.appendMessage( + asAppendMessage({ + role: "assistant", + content: [{ type: "toolCall", id: "call_1", name: "write", arguments: {} }], + }), + ); + + expect(getPersistedMessages(sm)).toHaveLength(0); + }); + it("flushes pending tool results when a sanitized assistant message is dropped", () => { const sm = SessionManager.inMemory(); installSessionToolResultGuard(sm); diff --git a/src/agents/session-tool-result-guard.ts b/src/agents/session-tool-result-guard.ts index 0f82cd2d4..016199178 100644 --- a/src/agents/session-tool-result-guard.ts +++ b/src/agents/session-tool-result-guard.ts @@ -96,6 +96,11 @@ export function installSessionToolResultGuard( * Defaults to true. */ allowSyntheticToolResults?: boolean; + /** + * Optional set/list of tool names accepted for assistant toolCall/toolUse blocks. + * When set, tool calls with unknown names are dropped before persistence. + */ + allowedToolNames?: Iterable; /** * Synchronous hook invoked before any message is written to the session JSONL. * If the hook returns { block: true }, the message is silently dropped. @@ -171,7 +176,9 @@ export function installSessionToolResultGuard( let nextMessage = message; const role = (message as { role?: unknown }).role; if (role === "assistant") { - const sanitized = sanitizeToolCallInputs([message]); + const sanitized = sanitizeToolCallInputs([message], { + allowedToolNames: opts?.allowedToolNames, + }); if (sanitized.length === 0) { if (allowSyntheticToolResults && pending.size > 0) { flushPendingToolResults(); diff --git a/src/agents/session-transcript-repair.e2e.test.ts b/src/agents/session-transcript-repair.e2e.test.ts index de988edf6..68797cfee 100644 --- a/src/agents/session-transcript-repair.e2e.test.ts +++ b/src/agents/session-transcript-repair.e2e.test.ts @@ -241,6 +241,65 @@ describe("sanitizeToolCallInputs", () => { expect((toolCalls[0] as { id?: unknown }).id).toBe("call_ok"); }); + it("drops tool calls with malformed or overlong names", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_ok", name: "read", arguments: {} }, + { + type: "toolCall", + id: "call_bad_chars", + name: 'toolu_01abc <|tool_call_argument_begin|> {"command"', + arguments: {}, + }, + { + type: "toolUse", + id: "call_too_long", + name: `read_${"x".repeat(80)}`, + input: {}, + }, + ], + }, + ] as unknown as AgentMessage[]; + + const out = sanitizeToolCallInputs(input); + const assistant = out[0] as Extract; + const toolCalls = Array.isArray(assistant.content) + ? assistant.content.filter((block) => { + const type = (block as { type?: unknown }).type; + return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type); + }) + : []; + + expect(toolCalls).toHaveLength(1); + expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); + }); + + it("drops unknown tool names when an allowlist is provided", () => { + const input = [ + { + role: "assistant", + content: [ + { type: "toolCall", id: "call_ok", name: "read", arguments: {} }, + { type: "toolCall", id: "call_unknown", name: "write", arguments: {} }, + ], + }, + ] as unknown as AgentMessage[]; + + const out = sanitizeToolCallInputs(input, { allowedToolNames: ["read"] }); + const assistant = out[0] as Extract; + const toolCalls = Array.isArray(assistant.content) + ? assistant.content.filter((block) => { + const type = (block as { type?: unknown }).type; + return typeof type === "string" && ["toolCall", "toolUse", "functionCall"].includes(type); + }) + : []; + + expect(toolCalls).toHaveLength(1); + expect((toolCalls[0] as { name?: unknown }).name).toBe("read"); + }); + it("keeps valid tool calls and preserves text blocks", () => { const input = [ { diff --git a/src/agents/session-transcript-repair.ts b/src/agents/session-transcript-repair.ts index 5dad80241..31b962487 100644 --- a/src/agents/session-transcript-repair.ts +++ b/src/agents/session-transcript-repair.ts @@ -1,6 +1,9 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { extractToolCallsFromAssistant, extractToolResultId } from "./tool-call-id.js"; +const TOOL_CALL_NAME_MAX_CHARS = 64; +const TOOL_CALL_NAME_RE = /^[A-Za-z0-9_-]+$/; + type ToolCallBlock = { type?: unknown; id?: unknown; @@ -35,8 +38,38 @@ function hasToolCallId(block: ToolCallBlock): boolean { return hasNonEmptyStringField(block.id); } -function hasToolCallName(block: ToolCallBlock): boolean { - return hasNonEmptyStringField(block.name); +function normalizeAllowedToolNames(allowedToolNames?: Iterable): Set | null { + if (!allowedToolNames) { + return null; + } + const normalized = new Set(); + for (const name of allowedToolNames) { + if (typeof name !== "string") { + continue; + } + const trimmed = name.trim(); + if (trimmed) { + normalized.add(trimmed.toLowerCase()); + } + } + return normalized.size > 0 ? normalized : null; +} + +function hasToolCallName(block: ToolCallBlock, allowedToolNames: Set | null): boolean { + if (typeof block.name !== "string") { + return false; + } + const trimmed = block.name.trim(); + if (!trimmed || trimmed !== block.name) { + return false; + } + if (trimmed.length > TOOL_CALL_NAME_MAX_CHARS || !TOOL_CALL_NAME_RE.test(trimmed)) { + return false; + } + if (!allowedToolNames) { + return true; + } + return allowedToolNames.has(trimmed.toLowerCase()); } function makeMissingToolResult(params: { @@ -66,6 +99,10 @@ export type ToolCallInputRepairReport = { droppedAssistantMessages: number; }; +export type ToolCallInputRepairOptions = { + allowedToolNames?: Iterable; +}; + export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] { let touched = false; const out: AgentMessage[] = []; @@ -85,11 +122,15 @@ export function stripToolResultDetails(messages: AgentMessage[]): AgentMessage[] return touched ? out : messages; } -export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRepairReport { +export function repairToolCallInputs( + messages: AgentMessage[], + options?: ToolCallInputRepairOptions, +): ToolCallInputRepairReport { let droppedToolCalls = 0; let droppedAssistantMessages = 0; let changed = false; const out: AgentMessage[] = []; + const allowedToolNames = normalizeAllowedToolNames(options?.allowedToolNames); for (const msg of messages) { if (!msg || typeof msg !== "object") { @@ -108,7 +149,9 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep for (const block of msg.content) { if ( isToolCallBlock(block) && - (!hasToolCallInput(block) || !hasToolCallId(block) || !hasToolCallName(block)) + (!hasToolCallInput(block) || + !hasToolCallId(block) || + !hasToolCallName(block, allowedToolNames)) ) { droppedToolCalls += 1; droppedInMessage += 1; @@ -138,8 +181,11 @@ export function repairToolCallInputs(messages: AgentMessage[]): ToolCallInputRep }; } -export function sanitizeToolCallInputs(messages: AgentMessage[]): AgentMessage[] { - return repairToolCallInputs(messages).messages; +export function sanitizeToolCallInputs( + messages: AgentMessage[], + options?: ToolCallInputRepairOptions, +): AgentMessage[] { + return repairToolCallInputs(messages, options).messages; } export function sanitizeToolUseResultPairing(messages: AgentMessage[]): AgentMessage[] {