From 5ca0233db05fe9166e314bd7a139e3483fe1e18d Mon Sep 17 00:00:00 2001 From: Frank Yang Date: Fri, 13 Mar 2026 16:57:56 +0800 Subject: [PATCH] fix(agents): drop Anthropic thinking blocks on replay (#44843) * agents: drop Anthropic thinking blocks on replay * fix: extend anthropic replay sanitization openclaw#44429 thanks @jmcte * fix: extend anthropic replay sanitization openclaw#44843 thanks @jmcte * test: add bedrock replay sanitization coverage openclaw#44843 * test: cover anthropic provider drop-thinking hints openclaw#44843 --------- Co-authored-by: johnmteneyckjr --- CHANGELOG.md | 1 + ...ed-runner.sanitize-session-history.test.ts | 41 +++++++++++++++---- src/agents/pi-embedded-runner/run/attempt.ts | 7 ++-- src/agents/provider-capabilities.test.ts | 26 +++++++++++- src/agents/provider-capabilities.ts | 2 + src/agents/transcript-policy.ts | 6 +-- 6 files changed, 67 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bb9fa83b5..5483519af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -95,6 +95,7 @@ Docs: https://docs.openclaw.ai - Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm. - Delivery/dedupe: trim completed direct-cron delivery cache correctly and keep mirrored transcript dedupe active even when transcript files contain malformed lines. (#44666) thanks @frankekn. - CLI/thinking help: add the missing `xhigh` level hints to `openclaw cron add`, `openclaw cron edit`, and `openclaw agent` so the help text matches the levels already accepted at runtime. (#44819) Thanks @kiki830621. +- Agents/Anthropic replay: drop replayed assistant thinking blocks for native Anthropic and Bedrock Claude providers so persisted follow-up turns no longer fail on stored thinking blocks. (#44843) Thanks @jmcte. ## 2026.3.11 diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 57639c804..2a71e0c95 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -52,6 +52,21 @@ describe("sanitizeSessionHistory", () => { sessionId: TEST_SESSION_ID, }); + const sanitizeAnthropicHistory = async (params: { + messages: AgentMessage[]; + provider?: string; + modelApi?: string; + modelId?: string; + }) => + sanitizeSessionHistory({ + messages: params.messages, + modelApi: params.modelApi ?? "anthropic-messages", + provider: params.provider ?? "anthropic", + modelId: params.modelId ?? "claude-opus-4-6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + const getAssistantMessage = (messages: AgentMessage[]) => { expect(messages[1]?.role).toBe("assistant"); return messages[1] as Extract; @@ -760,22 +775,30 @@ describe("sanitizeSessionHistory", () => { expect(types).not.toContain("thinking"); }); - it("does not drop thinking blocks for non-copilot providers", async () => { + it("drops assistant thinking blocks for anthropic replay", async () => { setNonGoogleModelApi(); const messages = makeThinkingAndTextAssistantMessages(); - const result = await sanitizeSessionHistory({ + const result = await sanitizeAnthropicHistory({ messages }); + + const assistant = getAssistantMessage(result); + expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); + }); + + it("drops assistant thinking blocks for amazon-bedrock replay", async () => { + setNonGoogleModelApi(); + + const messages = makeThinkingAndTextAssistantMessages(); + + const result = await sanitizeAnthropicHistory({ messages, - modelApi: "anthropic-messages", - provider: "anthropic", - modelId: "claude-opus-4-6", - sessionManager: makeMockSessionManager(), - sessionId: TEST_SESSION_ID, + provider: "amazon-bedrock", + modelApi: "bedrock-converse-stream", }); - const types = getAssistantContentTypes(result); - expect(types).toContain("thinking"); + const assistant = getAssistantMessage(result); + expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); }); it("does not drop thinking blocks for non-claude copilot models", async () => { diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 3457fdf01..274ef0ef8 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -1947,9 +1947,10 @@ export async function runEmbeddedAttempt( activeSession.agent.streamFn = cacheTrace.wrapStreamFn(activeSession.agent.streamFn); } - // Copilot/Claude can reject persisted `thinking` blocks (e.g. thinkingSignature:"reasoning_text") - // on *any* follow-up provider call (including tool continuations). Wrap the stream function - // so every outbound request sees sanitized messages. + // Anthropic Claude endpoints can reject replayed `thinking` blocks + // (e.g. thinkingSignature:"reasoning_text") on any follow-up provider + // call, including tool continuations. Wrap the stream function so every + // outbound request sees sanitized messages. if (transcriptPolicy.dropThinkingBlocks) { const inner = activeSession.agent.streamFn; activeSession.agent.streamFn = (model, context, options) => { diff --git a/src/agents/provider-capabilities.test.ts b/src/agents/provider-capabilities.test.ts index 90d2b52ff..ef59f025d 100644 --- a/src/agents/provider-capabilities.test.ts +++ b/src/agents/provider-capabilities.test.ts @@ -22,7 +22,19 @@ describe("resolveProviderCapabilities", () => { transcriptToolCallIdMode: "default", transcriptToolCallIdModelHints: [], geminiThoughtSignatureModelHints: [], - dropThinkingBlockModelHints: [], + dropThinkingBlockModelHints: ["claude"], + }); + expect(resolveProviderCapabilities("amazon-bedrock")).toEqual({ + anthropicToolSchemaMode: "native", + anthropicToolChoiceMode: "native", + providerFamily: "anthropic", + preserveAnthropicThinkingSignatures: true, + openAiCompatTurnValidation: true, + geminiThoughtSignatureSanitization: false, + transcriptToolCallIdMode: "default", + transcriptToolCallIdModelHints: [], + geminiThoughtSignatureModelHints: [], + dropThinkingBlockModelHints: ["claude"], }); }); @@ -82,6 +94,18 @@ describe("resolveProviderCapabilities", () => { it("tracks provider families and model-specific transcript quirks in the registry", () => { expect(isOpenAiProviderFamily("openai")).toBe(true); expect(isAnthropicProviderFamily("amazon-bedrock")).toBe(true); + expect( + shouldDropThinkingBlocksForModel({ + provider: "anthropic", + modelId: "claude-opus-4-6", + }), + ).toBe(true); + expect( + shouldDropThinkingBlocksForModel({ + provider: "amazon-bedrock", + modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0", + }), + ).toBe(true); expect( shouldDropThinkingBlocksForModel({ provider: "github-copilot", diff --git a/src/agents/provider-capabilities.ts b/src/agents/provider-capabilities.ts index 27aadbcd7..f443fac4d 100644 --- a/src/agents/provider-capabilities.ts +++ b/src/agents/provider-capabilities.ts @@ -29,9 +29,11 @@ const DEFAULT_PROVIDER_CAPABILITIES: ProviderCapabilities = { const PROVIDER_CAPABILITIES: Record> = { anthropic: { providerFamily: "anthropic", + dropThinkingBlockModelHints: ["claude"], }, "amazon-bedrock": { providerFamily: "anthropic", + dropThinkingBlockModelHints: ["claude"], }, // kimi-coding natively supports Anthropic tool framing (input_schema); // converting to OpenAI format causes XML text fallback instead of tool_use blocks. diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index d6d9ec591..46795bad1 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -80,9 +80,9 @@ export function resolveTranscriptPolicy(params: { }); const requiresOpenAiCompatibleToolIdSanitization = params.modelApi === "openai-completions"; - // GitHub Copilot's Claude endpoints can reject persisted `thinking` blocks with - // non-binary/non-base64 signatures (e.g. thinkingSignature: "reasoning_text"). - // Drop these blocks at send-time to keep sessions usable. + // Anthropic Claude endpoints can reject replayed `thinking` blocks unless the + // original signatures are preserved byte-for-byte. Drop them at send-time to + // keep persisted sessions usable across follow-up turns. const dropThinkingBlocks = shouldDropThinkingBlocksForModel({ provider, modelId }); const needsNonImageSanitize =