fix(agents): drop Anthropic thinking blocks on replay (#44843)
* agents: drop Anthropic thinking blocks on replay * fix: extend anthropic replay sanitization openclaw#44429 thanks @jmcte * fix: extend anthropic replay sanitization openclaw#44843 thanks @jmcte * test: add bedrock replay sanitization coverage openclaw#44843 * test: cover anthropic provider drop-thinking hints openclaw#44843 --------- Co-authored-by: johnmteneyckjr <john.m.teneyck@gmail.com>
This commit is contained in:
@@ -95,6 +95,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Telegram/Discord status reactions: show a temporary compacting reaction during auto-compaction pauses and restore thinking afterward so the bot no longer appears frozen while context is being compacted. (#35474) thanks @Cypherm.
|
||||
- Delivery/dedupe: trim completed direct-cron delivery cache correctly and keep mirrored transcript dedupe active even when transcript files contain malformed lines. (#44666) thanks @frankekn.
|
||||
- CLI/thinking help: add the missing `xhigh` level hints to `openclaw cron add`, `openclaw cron edit`, and `openclaw agent` so the help text matches the levels already accepted at runtime. (#44819) Thanks @kiki830621.
|
||||
- Agents/Anthropic replay: drop replayed assistant thinking blocks for native Anthropic and Bedrock Claude providers so persisted follow-up turns no longer fail on stored thinking blocks. (#44843) Thanks @jmcte.
|
||||
|
||||
## 2026.3.11
|
||||
|
||||
|
||||
@@ -52,6 +52,21 @@ describe("sanitizeSessionHistory", () => {
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
const sanitizeAnthropicHistory = async (params: {
|
||||
messages: AgentMessage[];
|
||||
provider?: string;
|
||||
modelApi?: string;
|
||||
modelId?: string;
|
||||
}) =>
|
||||
sanitizeSessionHistory({
|
||||
messages: params.messages,
|
||||
modelApi: params.modelApi ?? "anthropic-messages",
|
||||
provider: params.provider ?? "anthropic",
|
||||
modelId: params.modelId ?? "claude-opus-4-6",
|
||||
sessionManager: makeMockSessionManager(),
|
||||
sessionId: TEST_SESSION_ID,
|
||||
});
|
||||
|
||||
const getAssistantMessage = (messages: AgentMessage[]) => {
|
||||
expect(messages[1]?.role).toBe("assistant");
|
||||
return messages[1] as Extract<AgentMessage, { role: "assistant" }>;
|
||||
@@ -760,22 +775,30 @@ describe("sanitizeSessionHistory", () => {
|
||||
expect(types).not.toContain("thinking");
|
||||
});
|
||||
|
||||
it("does not drop thinking blocks for non-copilot providers", async () => {
|
||||
it("drops assistant thinking blocks for anthropic replay", async () => {
|
||||
setNonGoogleModelApi();
|
||||
|
||||
const messages = makeThinkingAndTextAssistantMessages();
|
||||
|
||||
const result = await sanitizeSessionHistory({
|
||||
const result = await sanitizeAnthropicHistory({ messages });
|
||||
|
||||
const assistant = getAssistantMessage(result);
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
|
||||
});
|
||||
|
||||
it("drops assistant thinking blocks for amazon-bedrock replay", async () => {
|
||||
setNonGoogleModelApi();
|
||||
|
||||
const messages = makeThinkingAndTextAssistantMessages();
|
||||
|
||||
const result = await sanitizeAnthropicHistory({
|
||||
messages,
|
||||
modelApi: "anthropic-messages",
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-6",
|
||||
sessionManager: makeMockSessionManager(),
|
||||
sessionId: TEST_SESSION_ID,
|
||||
provider: "amazon-bedrock",
|
||||
modelApi: "bedrock-converse-stream",
|
||||
});
|
||||
|
||||
const types = getAssistantContentTypes(result);
|
||||
expect(types).toContain("thinking");
|
||||
const assistant = getAssistantMessage(result);
|
||||
expect(assistant.content).toEqual([{ type: "text", text: "hi" }]);
|
||||
});
|
||||
|
||||
it("does not drop thinking blocks for non-claude copilot models", async () => {
|
||||
|
||||
@@ -1947,9 +1947,10 @@ export async function runEmbeddedAttempt(
|
||||
activeSession.agent.streamFn = cacheTrace.wrapStreamFn(activeSession.agent.streamFn);
|
||||
}
|
||||
|
||||
// Copilot/Claude can reject persisted `thinking` blocks (e.g. thinkingSignature:"reasoning_text")
|
||||
// on *any* follow-up provider call (including tool continuations). Wrap the stream function
|
||||
// so every outbound request sees sanitized messages.
|
||||
// Anthropic Claude endpoints can reject replayed `thinking` blocks
|
||||
// (e.g. thinkingSignature:"reasoning_text") on any follow-up provider
|
||||
// call, including tool continuations. Wrap the stream function so every
|
||||
// outbound request sees sanitized messages.
|
||||
if (transcriptPolicy.dropThinkingBlocks) {
|
||||
const inner = activeSession.agent.streamFn;
|
||||
activeSession.agent.streamFn = (model, context, options) => {
|
||||
|
||||
@@ -22,7 +22,19 @@ describe("resolveProviderCapabilities", () => {
|
||||
transcriptToolCallIdMode: "default",
|
||||
transcriptToolCallIdModelHints: [],
|
||||
geminiThoughtSignatureModelHints: [],
|
||||
dropThinkingBlockModelHints: [],
|
||||
dropThinkingBlockModelHints: ["claude"],
|
||||
});
|
||||
expect(resolveProviderCapabilities("amazon-bedrock")).toEqual({
|
||||
anthropicToolSchemaMode: "native",
|
||||
anthropicToolChoiceMode: "native",
|
||||
providerFamily: "anthropic",
|
||||
preserveAnthropicThinkingSignatures: true,
|
||||
openAiCompatTurnValidation: true,
|
||||
geminiThoughtSignatureSanitization: false,
|
||||
transcriptToolCallIdMode: "default",
|
||||
transcriptToolCallIdModelHints: [],
|
||||
geminiThoughtSignatureModelHints: [],
|
||||
dropThinkingBlockModelHints: ["claude"],
|
||||
});
|
||||
});
|
||||
|
||||
@@ -82,6 +94,18 @@ describe("resolveProviderCapabilities", () => {
|
||||
it("tracks provider families and model-specific transcript quirks in the registry", () => {
|
||||
expect(isOpenAiProviderFamily("openai")).toBe(true);
|
||||
expect(isAnthropicProviderFamily("amazon-bedrock")).toBe(true);
|
||||
expect(
|
||||
shouldDropThinkingBlocksForModel({
|
||||
provider: "anthropic",
|
||||
modelId: "claude-opus-4-6",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
shouldDropThinkingBlocksForModel({
|
||||
provider: "amazon-bedrock",
|
||||
modelId: "anthropic.claude-3-5-sonnet-20241022-v2:0",
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
shouldDropThinkingBlocksForModel({
|
||||
provider: "github-copilot",
|
||||
|
||||
@@ -29,9 +29,11 @@ const DEFAULT_PROVIDER_CAPABILITIES: ProviderCapabilities = {
|
||||
const PROVIDER_CAPABILITIES: Record<string, Partial<ProviderCapabilities>> = {
|
||||
anthropic: {
|
||||
providerFamily: "anthropic",
|
||||
dropThinkingBlockModelHints: ["claude"],
|
||||
},
|
||||
"amazon-bedrock": {
|
||||
providerFamily: "anthropic",
|
||||
dropThinkingBlockModelHints: ["claude"],
|
||||
},
|
||||
// kimi-coding natively supports Anthropic tool framing (input_schema);
|
||||
// converting to OpenAI format causes XML text fallback instead of tool_use blocks.
|
||||
|
||||
@@ -80,9 +80,9 @@ export function resolveTranscriptPolicy(params: {
|
||||
});
|
||||
const requiresOpenAiCompatibleToolIdSanitization = params.modelApi === "openai-completions";
|
||||
|
||||
// GitHub Copilot's Claude endpoints can reject persisted `thinking` blocks with
|
||||
// non-binary/non-base64 signatures (e.g. thinkingSignature: "reasoning_text").
|
||||
// Drop these blocks at send-time to keep sessions usable.
|
||||
// Anthropic Claude endpoints can reject replayed `thinking` blocks unless the
|
||||
// original signatures are preserved byte-for-byte. Drop them at send-time to
|
||||
// keep persisted sessions usable across follow-up turns.
|
||||
const dropThinkingBlocks = shouldDropThinkingBlocksForModel({ provider, modelId });
|
||||
|
||||
const needsNonImageSanitize =
|
||||
|
||||
Reference in New Issue
Block a user