From feccac672313b33ff6d7cf31f05b2fb38e9fd03d Mon Sep 17 00:00:00 2001 From: jackheuberger <7830838+jackheuberger@users.noreply.github.com> Date: Fri, 20 Feb 2026 19:48:09 -0600 Subject: [PATCH] fix: sanitize thinking blocks for GitHub Copilot Claude models (openclaw#19459) thanks @jackheuberger Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: jackheuberger <12731288+jackheuberger@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> --- CHANGELOG.md | 1 + ...ed-runner.sanitize-session-history.test.ts | 168 ++++++++++++++++++ src/agents/pi-embedded-runner/google.ts | 9 +- src/agents/pi-embedded-runner/run/attempt.ts | 27 ++- src/agents/pi-embedded-runner/thinking.ts | 47 +++++ src/agents/transcript-policy.ts | 15 +- 6 files changed, 261 insertions(+), 6 deletions(-) create mode 100644 src/agents/pi-embedded-runner/thinking.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 06122b92b..d1e9394e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,6 +38,7 @@ Docs: https://docs.openclaw.ai - Gateway/Pairing: clear persisted paired-device state when the gateway client closes with `device token mismatch` (`1008`) so reconnect flows can cleanly re-enter pairing. (#22071) Thanks @mbelinky. - Signal/Outbound: preserve case for Base64 group IDs during outbound target normalization so cross-context routing and policy checks no longer break when group IDs include uppercase characters. (#5578) Thanks @heyhudson. +- Providers/Copilot: drop persisted assistant `thinking` blocks for Claude models (while preserving turn structure/tool blocks) so follow-up requests no longer fail on invalid `thinkingSignature` payloads. (#19459) Thanks @jackheuberger. - Providers/Copilot: add `claude-sonnet-4.6` and `claude-sonnet-4.5` to the default GitHub Copilot model catalog and add coverage for model-list/definition helpers. (#20270, fixes #20091) Thanks @Clawborn. - Dependencies/Agents: bump embedded Pi SDK packages (`@mariozechner/pi-agent-core`, `@mariozechner/pi-ai`, `@mariozechner/pi-coding-agent`, `@mariozechner/pi-tui`) to `0.54.0`. (#21578) Thanks @Takhoffman. - Gateway/Config: allow `gateway.customBindHost` in strict config validation when `gateway.bind="custom"` so valid custom bind-host configurations no longer fail startup. (#20318, fixes #20289) Thanks @MisterGuy420. diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index 1eeb04636..665e98798 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -284,4 +284,172 @@ describe("sanitizeSessionHistory", () => { ), ).toBe(false); }); + + it("drops assistant thinking blocks for github-copilot models", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + const messages = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "reasoning_text", + }, + { type: "text", text: "hi" }, + ], + }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-completions", + provider: "github-copilot", + modelId: "claude-opus-4.6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect(result[1]?.role).toBe("assistant"); + const assistant = result[1] as Extract; + expect(assistant.content).toEqual([{ type: "text", text: "hi" }]); + }); + + it("preserves assistant turn when all content is thinking blocks (github-copilot)", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + const messages = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "some reasoning", + thinkingSignature: "reasoning_text", + }, + ], + }, + { role: "user", content: "follow up" }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-completions", + provider: "github-copilot", + modelId: "claude-opus-4.6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + // Assistant turn should be preserved (not dropped) to maintain turn alternation + expect(result).toHaveLength(3); + expect(result[1]?.role).toBe("assistant"); + const assistant = result[1] as Extract; + expect(assistant.content).toEqual([{ type: "text", text: "" }]); + }); + + it("preserves tool_use blocks when dropping thinking blocks (github-copilot)", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + const messages = [ + { role: "user", content: "read a file" }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "I should use the read tool", + thinkingSignature: "reasoning_text", + }, + { type: "toolCall", id: "tool_123", name: "read", arguments: { path: "/tmp/test" } }, + { type: "text", text: "Let me read that file." }, + ], + }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-completions", + provider: "github-copilot", + modelId: "claude-opus-4.6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect(result[1]?.role).toBe("assistant"); + const assistant = result[1] as Extract; + const types = assistant.content.map((b: { type: string }) => b.type); + expect(types).toContain("toolCall"); + expect(types).toContain("text"); + expect(types).not.toContain("thinking"); + }); + + it("does not drop thinking blocks for non-copilot providers", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + const messages = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "some_sig", + }, + { type: "text", text: "hi" }, + ], + }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "anthropic-messages", + provider: "anthropic", + modelId: "claude-opus-4-6", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect(result[1]?.role).toBe("assistant"); + const assistant = result[1] as Extract; + const types = assistant.content.map((b: { type: string }) => b.type); + expect(types).toContain("thinking"); + }); + + it("does not drop thinking blocks for non-claude copilot models", async () => { + vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); + + const messages = [ + { role: "user", content: "hello" }, + { + role: "assistant", + content: [ + { + type: "thinking", + thinking: "internal", + thinkingSignature: "some_sig", + }, + { type: "text", text: "hi" }, + ], + }, + ] as unknown as AgentMessage[]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-completions", + provider: "github-copilot", + modelId: "gpt-5.2", + sessionManager: makeMockSessionManager(), + sessionId: TEST_SESSION_ID, + }); + + expect(result[1]?.role).toBe("assistant"); + const assistant = result[1] as Extract; + const types = assistant.content.map((b: { type: string }) => b.type); + expect(types).toContain("thinking"); + }); }); diff --git a/src/agents/pi-embedded-runner/google.ts b/src/agents/pi-embedded-runner/google.ts index 9a0263a2d..f9c6c2c64 100644 --- a/src/agents/pi-embedded-runner/google.ts +++ b/src/agents/pi-embedded-runner/google.ts @@ -25,6 +25,7 @@ import { import type { TranscriptPolicy } from "../transcript-policy.js"; import { resolveTranscriptPolicy } from "../transcript-policy.js"; import { log } from "./logger.js"; +import { dropThinkingBlocks } from "./thinking.js"; import { describeUnknownError } from "./utils.js"; const GOOGLE_TURN_ORDERING_CUSTOM_TYPE = "google-turn-ordering-bootstrap"; @@ -50,6 +51,7 @@ const GOOGLE_SCHEMA_UNSUPPORTED_KEYWORDS = new Set([ "minProperties", "maxProperties", ]); + const ANTIGRAVITY_SIGNATURE_RE = /^[A-Za-z0-9+/]+={0,2}$/; const INTER_SESSION_PREFIX_BASE = "[Inter-session message]"; @@ -450,9 +452,12 @@ export async function sanitizeSessionHistory(params: { ...resolveImageSanitizationLimits(params.config), }, ); - const sanitizedThinking = policy.normalizeAntigravityThinkingBlocks - ? sanitizeAntigravityThinkingBlocks(sanitizedImages) + const droppedThinking = policy.dropThinkingBlocks + ? dropThinkingBlocks(sanitizedImages) : sanitizedImages; + const sanitizedThinking = policy.sanitizeThinkingSignatures + ? sanitizeAntigravityThinkingBlocks(droppedThinking) + : droppedThinking; const sanitizedToolCalls = sanitizeToolCallInputs(sanitizedThinking); const repairedTools = policy.repairToolUseResultPairing ? sanitizeToolUseResultPairing(sanitizedToolCalls) diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index fb808d56f..382ce4b39 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -94,6 +94,7 @@ import { buildEmbeddedSystemPrompt, createSystemPromptOverride, } from "../system-prompt.js"; +import { dropThinkingBlocks } from "../thinking.js"; import { installToolResultContextGuard } from "../tool-result-context-guard.js"; import { splitSdkTools } from "../tool-split.js"; import { describeUnknownError, mapThinkingLevel } from "../utils.js"; @@ -652,6 +653,30 @@ export async function runEmbeddedAttempt( }); activeSession.agent.streamFn = cacheTrace.wrapStreamFn(activeSession.agent.streamFn); } + + // Copilot/Claude can reject persisted `thinking` blocks (e.g. thinkingSignature:"reasoning_text") + // on *any* follow-up provider call (including tool continuations). Wrap the stream function + // so every outbound request sees sanitized messages. + if (transcriptPolicy.dropThinkingBlocks) { + const inner = activeSession.agent.streamFn; + activeSession.agent.streamFn = (model, context, options) => { + const ctx = context as unknown as { messages?: unknown }; + const messages = ctx?.messages; + if (!Array.isArray(messages)) { + return inner(model, context, options); + } + const sanitized = dropThinkingBlocks(messages as unknown as AgentMessage[]) as unknown; + if (sanitized === messages) { + return inner(model, context, options); + } + const nextContext = { + ...(context as unknown as Record), + messages: sanitized, + } as unknown; + return inner(model, nextContext as typeof context, options); + }; + } + if (anthropicPayloadLogger) { activeSession.agent.streamFn = anthropicPayloadLogger.wrapStreamFn( activeSession.agent.streamFn, @@ -945,7 +970,7 @@ export async function runEmbeddedAttempt( sessionManager.resetLeaf(); } const sessionContext = sessionManager.buildSessionContext(); - const sanitizedOrphan = transcriptPolicy.normalizeAntigravityThinkingBlocks + const sanitizedOrphan = transcriptPolicy.sanitizeThinkingSignatures ? sanitizeAntigravityThinkingBlocks(sessionContext.messages) : sessionContext.messages; activeSession.agent.replaceMessages(sanitizedOrphan); diff --git a/src/agents/pi-embedded-runner/thinking.ts b/src/agents/pi-embedded-runner/thinking.ts new file mode 100644 index 000000000..5cd7ba7d4 --- /dev/null +++ b/src/agents/pi-embedded-runner/thinking.ts @@ -0,0 +1,47 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +type AssistantContentBlock = Extract["content"][number]; + +/** + * Strip all `type: "thinking"` content blocks from assistant messages. + * + * If an assistant message becomes empty after stripping, it is replaced with + * a synthetic `{ type: "text", text: "" }` block to preserve turn structure + * (some providers require strict user/assistant alternation). + * + * Returns the original array reference when nothing was changed (callers can + * use reference equality to skip downstream work). + */ +export function dropThinkingBlocks(messages: AgentMessage[]): AgentMessage[] { + let touched = false; + const out: AgentMessage[] = []; + for (const msg of messages) { + if (!msg || typeof msg !== "object" || msg.role !== "assistant") { + out.push(msg); + continue; + } + if (!Array.isArray(msg.content)) { + out.push(msg); + continue; + } + const nextContent: AssistantContentBlock[] = []; + let changed = false; + for (const block of msg.content) { + if (block && typeof block === "object" && (block as { type?: unknown }).type === "thinking") { + touched = true; + changed = true; + continue; + } + nextContent.push(block); + } + if (!changed) { + out.push(msg); + continue; + } + // Preserve the assistant turn even if all blocks were thinking-only. + const content = + nextContent.length > 0 ? nextContent : [{ type: "text", text: "" } as AssistantContentBlock]; + out.push({ ...msg, content }); + } + return touched ? out : messages; +} diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index 62ccea805..20c58a1f8 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -14,7 +14,8 @@ export type TranscriptPolicy = { allowBase64Only?: boolean; includeCamelCase?: boolean; }; - normalizeAntigravityThinkingBlocks: boolean; + sanitizeThinkingSignatures: boolean; + dropThinkingBlocks: boolean; applyGoogleTurnOrdering: boolean; validateGeminiTurns: boolean; validateAnthropicTurns: boolean; @@ -93,6 +94,13 @@ export function resolveTranscriptPolicy(params: { modelId, }); + const isCopilotClaude = provider === "github-copilot" && modelId.toLowerCase().includes("claude"); + + // GitHub Copilot's Claude endpoints can reject persisted `thinking` blocks with + // non-binary/non-base64 signatures (e.g. thinkingSignature: "reasoning_text"). + // Drop these blocks at send-time to keep sessions usable. + const dropThinkingBlocks = isCopilotClaude; + const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini; const sanitizeToolCallIds = isGoogle || isMistral || isAnthropic; @@ -105,7 +113,7 @@ export function resolveTranscriptPolicy(params: { const sanitizeThoughtSignatures = isOpenRouterGemini ? { allowBase64Only: true, includeCamelCase: true } : undefined; - const normalizeAntigravityThinkingBlocks = isAntigravityClaudeModel; + const sanitizeThinkingSignatures = isAntigravityClaudeModel; return { sanitizeMode: isOpenAi ? "images-only" : needsNonImageSanitize ? "full" : "images-only", @@ -114,7 +122,8 @@ export function resolveTranscriptPolicy(params: { repairToolUseResultPairing: !isOpenAi && repairToolUseResultPairing, preserveSignatures: isAntigravityClaudeModel, sanitizeThoughtSignatures: isOpenAi ? undefined : sanitizeThoughtSignatures, - normalizeAntigravityThinkingBlocks, + sanitizeThinkingSignatures, + dropThinkingBlocks, applyGoogleTurnOrdering: !isOpenAi && isGoogle, validateGeminiTurns: !isOpenAi && isGoogle, validateAnthropicTurns: !isOpenAi && isAnthropic,