From 5ca0233db05fe9166e314bd7a139e3483fe1e18d Mon Sep 17 00:00:00 2001
From: Frank Yang <frank.ekn@gmail.com>
Date: Fri, 13 Mar 2026 16:57:56 +0800
Subject: [PATCH] fix(agents): drop Anthropic thinking blocks on replay
 (#44843)

* agents: drop Anthropic thinking blocks on replay

* fix: extend anthropic replay sanitization openclaw#44429 thanks @jmcte

* fix: extend anthropic replay sanitization openclaw#44843 thanks @jmcte

* test: add bedrock replay sanitization coverage openclaw#44843

* test: cover anthropic provider drop-thinking hints openclaw#44843

---------

Co-authored-by: johnmteneyckjr <john.m.teneyck@gmail.com>
---
 CHANGELOG.md                                  |  1 +
 ...ed-runner.sanitize-session-history.test.ts | 41 +++++++++++++++----
 src/agents/pi-embedded-runner/run/attempt.ts  |  7 ++--
 src/agents/provider-capabilities.test.ts      | 26 +++++++++++-
 src/agents/provider-capabilities.ts           |  2 +
 src/agents/transcript-policy.ts               |  6 +--
 6 files changed, 67 insertions(+), 16 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index bb9fa83b5..5483519af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -95,6 +95,7 @@ Docs: https://docs.openclaw.ai
 - Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm.
 - Delivery/dedupe: trim completed direct-cron delivery cache correctly and keep mirrored transcript dedupe active even when transcript files contain malformed lines. (#44666) thanks @frankekn.
 - CLI/thinking help: add the missing `xhigh` level hints to `openclaw cron add`, `openclaw cron edit`, and `openclaw agent` so the help text matches the levels already accepted at runtime. (#44819) Thanks @kiki830621.
+- Agents/Anthropic replay: drop replayed assistant thinking blocks for native Anthropic and Bedrock Claude providers so persisted follow-up turns no longer fail on stored thinking blocks. (#44843) Thanks @jmcte.
 
 ## 2026.3.11
 
diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
index 57639c804..2a71e0c95 100644
--- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
+++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts
@@ -52,6 +52,21 @@ describe("sanitizeSessionHistory", () => {
       sessionId: TEST_SESSION_ID,
     });
 
+  const sanitizeAnthropicHistory = async (params: {
+    messages: AgentMessage[];
+    provider?: string;
+    modelApi?: string;
+    modelId?: string;
+  }) =>
+    sanitizeSessionHistory({
+      messages: params.messages,
+      modelApi: params.modelApi ?? "anthropic-messages",
+      provider: params.provider ?? "anthropic",
+      modelId: params.modelId ?? "claude-opus-4-6",
+      sessionManager: makeMockSessionManager(),
+      sessionId: TEST_SESSION_ID,
+    });
+
   const getAssistantMessage = (messages: AgentMessage[]) => {
     expect(messages[1]?.role).toBe("assistant");
     return messages[1] as Extract<AgentMessage, { role: "assistant" }>;
@@ -760,22 +775,30 @@ describe("sanitizeSessionHistory", () => {
     expect(types).not.toContain("thinking");
   });
 
-  it("does not drop thinking blocks for non-copilot providers", async () => {
+  it("drops assistant thinking blocks for anthropic replay", async () => {
     setNonGoogleModelApi();
 
     const messages = makeThinkingAndTextAssistantMessages();
 
-    const result = await sanitizeSessionHistory({
+    const result = await sanitizeAnthropicHistory({ messages });
+
+    const assistant = getAssistantMessage(result);
+    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
+  });
+
+  it("drops assistant thinking blocks for amazon-bedrock replay", async () => {
+    setNonGoogleModelApi();
+
+    const messages = makeThinkingAndTextAssistantMessages();
+
+    const result = await sanitizeAnthropicHistory({
       messages,
-      modelApi: "anthropic-messages",
-      provider: "anthropic",
-      modelId: "claude-opus-4-6",
-      sessionManager: makeMockSessionManager(),
-      sessionId: TEST_SESSION_ID,
+      provider: "amazon-bedrock",
+      modelApi: "bedrock-converse-stream",
     });
 
-    const types = getAssistantContentTypes(result);
-    expect(types).toContain("thinking");
+    const assistant = getAssistantMessage(result);
+    expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
   });
 
   it("does not drop thinking blocks for non-claude copilot models", async () => {
diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts
index 3457fdf01..274ef0ef8 100644
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -1947,9 +1947,10 @@ export async function runEmbeddedAttempt(
         activeSession.agent.streamFn = cacheTrace.wrapStreamFn(activeSession.agent.streamFn);
       }
 
-      // Copilot/Claude can reject persisted `thinking` blocks (e.g. thinkingSignature:"reasoning_text")
-      // on *any* follow-up provider call (including tool continuations). Wrap the stream function
-      // so every outbound request sees sanitized messages.
+      // Anthropic Claude endpoints can reject replayed `thinking` blocks
+      // (e.g. thinkingSignature:"reasoning_text") on any follow-up provider
+      // call, including tool continuations. Wrap the stream function so every
+      // outbound request sees sanitized messages.
       if (transcriptPolicy.dropThinkingBlocks) {
         const inner = activeSession.agent.streamFn;
         activeSession.agent.streamFn = (model, context, options) => {
diff --git a/src/agents/provider-capabilities.test.ts b/src/agents/provider-capabilities.test.ts
index 90d2b52ff..ef59f025d 100644
--- a/src/agents/provider-capabilities.test.ts
+++ b/src/agents/provider-capabilities.test.ts
@@ -22,7 +22,19 @@ describe("resolveProviderCapabilities", () => {
       transcriptToolCallIdMode: "default",
       transcriptToolCallIdModelHints: [],
       geminiThoughtSignatureModelHints: [],
-      dropThinkingBlockModelHints: [],
+      dropThinkingBlockModelHints: ["claude"],
+    });
+    expect(resolveProviderCapabilities("amazon-bedrock")).toEqual({
+      anthropicToolSchemaMode: "native",
+      anthropicToolChoiceMode: "native",
+      providerFamily: "anthropic",
+      preserveAnthropicThinkingSignatures: true,
+      openAiCompatTurnValidation: true,
+      geminiThoughtSignatureSanitization: false,
+      transcriptToolCallIdMode: "default",
+      transcriptToolCallIdModelHints: [],
+      geminiThoughtSignatureModelHints: [],
+      dropThinkingBlockModelHints: ["claude"],
     });
   });
 
@@ -82,6 +94,18 @@ describe("resolveProviderCapabilities", () => {
   it("tracks provider families and model-specific transcript quirks in the registry", () => {
     expect(isOpenAiProviderFamily("openai")).toBe(true);
     expect(isAnthropicProviderFamily("amazon-bedrock")).toBe(true);
+    expect(
+      shouldDropThinkingBlocksForModel({
+        provider: "anthropic",
+        modelId: "claude-opus-4-6",
+      }),
+    ).toBe(true);
+    expect(
+      shouldDropThinkingBlocksForModel({
+        provider: "amazon-bedrock",
+        modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
+      }),
+    ).toBe(true);
     expect(
       shouldDropThinkingBlocksForModel({
         provider: "github-copilot",
diff --git a/src/agents/provider-capabilities.ts b/src/agents/provider-capabilities.ts
index 27aadbcd7..f443fac4d 100644
--- a/src/agents/provider-capabilities.ts
+++ b/src/agents/provider-capabilities.ts
@@ -29,9 +29,11 @@ const DEFAULT_PROVIDER_CAPABILITIES: ProviderCapabilities = {
 const PROVIDER_CAPABILITIES: Record<string, Partial<ProviderCapabilities>> = {
   anthropic: {
     providerFamily: "anthropic",
+    dropThinkingBlockModelHints: ["claude"],
   },
   "amazon-bedrock": {
     providerFamily: "anthropic",
+    dropThinkingBlockModelHints: ["claude"],
   },
   // kimi-coding natively supports Anthropic tool framing (input_schema);
   // converting to OpenAI format causes XML text fallback instead of tool_use blocks.
diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts
index d6d9ec591..46795bad1 100644
--- a/src/agents/transcript-policy.ts
+++ b/src/agents/transcript-policy.ts
@@ -80,9 +80,9 @@ export function resolveTranscriptPolicy(params: {
     });
   const requiresOpenAiCompatibleToolIdSanitization = params.modelApi === "openai-completions";
 
-  // GitHub Copilot's Claude endpoints can reject persisted `thinking` blocks with
-  // non-binary/non-base64 signatures (e.g. thinkingSignature: "reasoning_text").
-  // Drop these blocks at send-time to keep sessions usable.
+  // Anthropic Claude endpoints can reject replayed `thinking` blocks unless the
+  // original signatures are preserved byte-for-byte. Drop them at send-time to
+  // keep persisted sessions usable across follow-up turns.
   const dropThinkingBlocks = shouldDropThinkingBlocksForModel({ provider, modelId });
 
   const needsNonImageSanitize =