From 68ea063958eebb8d888298faed789ff98166b1e0 Mon Sep 17 00:00:00 2001 From: the sun gif man Date: Sun, 15 Feb 2026 22:45:01 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20preserve=20openai=20reaso?= =?UTF-8?q?ning=20replay=20ids=20(#17792)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit What: - disable tool-call id sanitization for OpenAI/OpenAI Codex transcript policy - gate id sanitization in image sanitizer to full mode only - keep orphan reasoning downgrade scoped to OpenAI model-switch replay path - update transcript policy, session-history, sanitizer, and reasoning replay tests - document OpenAI model-switch orphan-reasoning cleanup behavior in transcript hygiene reference Why: - OpenAI Responses replay depends on canonical call_id|fc_id pairings for reasoning followers - strict id rewriting in OpenAI path breaks follower matching and triggers rs_* orphan 400s - limiting scope avoids behavior expansion while fixing the identified regression Tests: - pnpm vitest run src/agents/transcript-policy.test.ts src/agents/pi-embedded-runner.sanitize-session-history.test.ts src/agents/openai-responses.reasoning-replay.test.ts - pnpm vitest run --config vitest.e2e.config.ts src/agents/transcript-policy.e2e.test.ts src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts - pnpm lint - pnpm format:check - pnpm check:docs - pnpm test (fails in current macOS bash 3.2 env at test/git-hooks-pre-commit.integration.test.ts: mapfile not found) --- .../openai-responses.reasoning-replay.test.ts | 9 ++++ ...tant-text-blocks-but-preserves.e2e.test.ts | 29 +++++++++++ ...helpers.sanitizeuserfacingtext.e2e.test.ts | 21 ++++++++ src/agents/pi-embedded-helpers/images.ts | 7 +-- ...er.openai-tool-id-preservation.e2e.test.ts | 50 +++++++++++++++++++ ...unner.sanitize-session-history.e2e.test.ts | 5 +- ...ed-runner.sanitize-session-history.test.ts | 10 ++-- src/agents/transcript-policy.e2e.test.ts | 6 +-- src/agents/transcript-policy.test.ts | 6 +-- src/agents/transcript-policy.ts | 4 +- 10 files changed, 126 insertions(+), 21 deletions(-) create mode 100644 src/agents/pi-embedded-runner.openai-tool-id-preservation.e2e.test.ts diff --git a/src/agents/openai-responses.reasoning-replay.test.ts b/src/agents/openai-responses.reasoning-replay.test.ts index 2a94db7e3..0ef535d92 100644 --- a/src/agents/openai-responses.reasoning-replay.test.ts +++ b/src/agents/openai-responses.reasoning-replay.test.ts @@ -115,6 +115,15 @@ describe("openai-responses reasoning replay", () => { expect(types).toContain("reasoning"); expect(types).toContain("function_call"); expect(types.indexOf("reasoning")).toBeLessThan(types.indexOf("function_call")); + + const functionCall = input.find( + (item) => + item && + typeof item === "object" && + (item as Record).type === "function_call", + ) as Record | undefined; + expect(functionCall?.call_id).toBe("call_123"); + expect(functionCall?.id).toBe("fc_123"); }); it("still replays reasoning when paired with an assistant message", async () => { diff --git a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts index 27accfaad..4770a4b4d 100644 --- a/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts +++ b/src/agents/pi-embedded-helpers.sanitize-session-messages-images.removes-empty-assistant-text-blocks-but-preserves.e2e.test.ts @@ -160,6 +160,35 @@ describe("sanitizeSessionMessagesImages", () => { const toolResult = out[1] as { toolUseId?: string }; expect(toolResult.toolUseId).toBe("callabcitem123"); }); + + it("does not sanitize tool IDs in images-only mode", async () => { + const input = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123|fc_456", name: "read", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_456", + toolName: "read", + content: [{ type: "text", text: "ok" }], + isError: false, + }, + ] satisfies AgentMessage[]; + + const out = await sanitizeSessionMessagesImages(input, "test", { + sanitizeMode: "images-only", + sanitizeToolCallIds: true, + toolCallIdMode: "strict", + }); + + const assistant = out[0] as unknown as { content?: Array<{ type?: string; id?: string }> }; + const toolCall = assistant.content?.find((b) => b.type === "toolCall"); + expect(toolCall?.id).toBe("call_123|fc_456"); + + const toolResult = out[1] as unknown as { toolCallId?: string }; + expect(toolResult.toolCallId).toBe("call_123|fc_456"); + }); it("filters whitespace-only assistant text blocks", async () => { const input = [ { diff --git a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts index 5d87a339d..4d9817f59 100644 --- a/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts +++ b/src/agents/pi-embedded-helpers.sanitizeuserfacingtext.e2e.test.ts @@ -294,6 +294,27 @@ describe("downgradeOpenAIReasoningBlocks", () => { // oxlint-disable-next-line typescript/no-explicit-any expect(downgradeOpenAIReasoningBlocks(input as any)).toEqual(input); }); + + it("is idempotent for orphaned reasoning cleanup", () => { + const input = [ + { + role: "assistant", + content: [ + { + type: "thinking", + thinkingSignature: JSON.stringify({ id: "rs_orphan", type: "reasoning" }), + }, + ], + }, + { role: "user", content: "next" }, + ]; + + // oxlint-disable-next-line typescript/no-explicit-any + const once = downgradeOpenAIReasoningBlocks(input as any); + // oxlint-disable-next-line typescript/no-explicit-any + const twice = downgradeOpenAIReasoningBlocks(once as any); + expect(twice).toEqual(once); + }); }); describe("normalizeTextForComparison", () => { diff --git a/src/agents/pi-embedded-helpers/images.ts b/src/agents/pi-embedded-helpers/images.ts index 3af4dd0a6..9162bb812 100644 --- a/src/agents/pi-embedded-helpers/images.ts +++ b/src/agents/pi-embedded-helpers/images.ts @@ -51,9 +51,10 @@ export async function sanitizeSessionMessagesImages( const allowNonImageSanitization = sanitizeMode === "full"; // We sanitize historical session messages because Anthropic can reject a request // if the transcript contains oversized base64 images (see MAX_IMAGE_DIMENSION_PX). - const sanitizedIds = options?.sanitizeToolCallIds - ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) - : messages; + const sanitizedIds = + allowNonImageSanitization && options?.sanitizeToolCallIds + ? sanitizeToolCallIdsForCloudCodeAssist(messages, options.toolCallIdMode) + : messages; const out: AgentMessage[] = []; for (const msg of sanitizedIds) { if (!msg || typeof msg !== "object") { diff --git a/src/agents/pi-embedded-runner.openai-tool-id-preservation.e2e.test.ts b/src/agents/pi-embedded-runner.openai-tool-id-preservation.e2e.test.ts new file mode 100644 index 000000000..115d0a22b --- /dev/null +++ b/src/agents/pi-embedded-runner.openai-tool-id-preservation.e2e.test.ts @@ -0,0 +1,50 @@ +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { describe, expect, it } from "vitest"; +import { + makeInMemorySessionManager, + makeModelSnapshotEntry, +} from "./pi-embedded-runner.sanitize-session-history.test-harness.js"; +import { sanitizeSessionHistory } from "./pi-embedded-runner/google.js"; + +describe("sanitizeSessionHistory openai tool id preservation", () => { + it("keeps canonical call_id|fc_id pairings for same-model openai replay", async () => { + const sessionEntries = [ + makeModelSnapshotEntry({ + provider: "openai", + modelApi: "openai-responses", + modelId: "gpt-5.2-codex", + }), + ]; + const sessionManager = makeInMemorySessionManager(sessionEntries); + + const messages: AgentMessage[] = [ + { + role: "assistant", + content: [{ type: "toolCall", id: "call_123|fc_123", name: "noop", arguments: {} }], + }, + { + role: "toolResult", + toolCallId: "call_123|fc_123", + toolName: "noop", + content: [{ type: "text", text: "ok" }], + isError: false, + } as unknown as AgentMessage, + ]; + + const result = await sanitizeSessionHistory({ + messages, + modelApi: "openai-responses", + provider: "openai", + modelId: "gpt-5.2-codex", + sessionManager, + sessionId: "test-session", + }); + + const assistant = result[0] as { content?: Array<{ type?: string; id?: string }> }; + const toolCall = assistant.content?.find((block) => block.type === "toolCall"); + expect(toolCall?.id).toBe("call_123|fc_123"); + + const toolResult = result[1] as { toolCallId?: string }; + expect(toolResult.toolCallId).toBe("call_123|fc_123"); + }); +}); diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts index c3f581006..d4f4488b6 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.e2e.test.ts @@ -52,7 +52,7 @@ describe("sanitizeSessionHistory e2e smoke", () => { ); }); - it("applies strict tool-call sanitization for openai-responses", async () => { + it("keeps images-only sanitize policy without tool-call id rewriting for openai-responses", async () => { vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); await sanitizeSessionHistory({ @@ -68,8 +68,7 @@ describe("sanitizeSessionHistory e2e smoke", () => { "session:history", expect.objectContaining({ sanitizeMode: "images-only", - sanitizeToolCallIds: true, - toolCallIdMode: "strict", + sanitizeToolCallIds: false, }), ); }); diff --git a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts index a36c5ba0b..ca463a2e3 100644 --- a/src/agents/pi-embedded-runner.sanitize-session-history.test.ts +++ b/src/agents/pi-embedded-runner.sanitize-session-history.test.ts @@ -98,7 +98,7 @@ describe("sanitizeSessionHistory", () => { ); }); - it("sanitizes tool call ids for openai-responses while keeping images-only mode", async () => { + it("does not sanitize tool call ids for openai-responses", async () => { vi.mocked(helpers.isGoogleModelApi).mockReturnValue(false); await sanitizeSessionHistory({ @@ -112,11 +112,7 @@ describe("sanitizeSessionHistory", () => { expect(helpers.sanitizeSessionMessagesImages).toHaveBeenCalledWith( mockMessages, "session:history", - expect.objectContaining({ - sanitizeMode: "images-only", - sanitizeToolCallIds: true, - toolCallIdMode: "strict", - }), + expect.objectContaining({ sanitizeMode: "images-only", sanitizeToolCallIds: false }), ); }); @@ -243,7 +239,7 @@ describe("sanitizeSessionHistory", () => { expect(result).toEqual(messages); }); - it("downgrades openai reasoning only when the model changes", async () => { + it("downgrades openai reasoning when the model changes", async () => { const sessionEntries = [ makeModelSnapshotEntry({ provider: "anthropic", diff --git a/src/agents/transcript-policy.e2e.test.ts b/src/agents/transcript-policy.e2e.test.ts index 669f69384..58f23e21c 100644 --- a/src/agents/transcript-policy.e2e.test.ts +++ b/src/agents/transcript-policy.e2e.test.ts @@ -2,15 +2,15 @@ import { describe, expect, it } from "vitest"; import { resolveTranscriptPolicy } from "./transcript-policy.js"; describe("resolveTranscriptPolicy e2e smoke", () => { - it("uses strict tool-call sanitization for OpenAI models", () => { + it("uses images-only sanitization without tool-call id rewriting for OpenAI models", () => { const policy = resolveTranscriptPolicy({ provider: "openai", modelId: "gpt-4o", modelApi: "openai", }); expect(policy.sanitizeMode).toBe("images-only"); - expect(policy.sanitizeToolCallIds).toBe(true); - expect(policy.toolCallIdMode).toBe("strict"); + expect(policy.sanitizeToolCallIds).toBe(false); + expect(policy.toolCallIdMode).toBeUndefined(); }); it("uses strict9 tool-call sanitization for Mistral-family models", () => { diff --git a/src/agents/transcript-policy.test.ts b/src/agents/transcript-policy.test.ts index 6ae7883db..56c1230b6 100644 --- a/src/agents/transcript-policy.test.ts +++ b/src/agents/transcript-policy.test.ts @@ -30,13 +30,13 @@ describe("resolveTranscriptPolicy", () => { expect(policy.toolCallIdMode).toBe("strict9"); }); - it("enables sanitizeToolCallIds for OpenAI provider", () => { + it("disables sanitizeToolCallIds for OpenAI provider", () => { const policy = resolveTranscriptPolicy({ provider: "openai", modelId: "gpt-4o", modelApi: "openai", }); - expect(policy.sanitizeToolCallIds).toBe(true); - expect(policy.toolCallIdMode).toBe("strict"); + expect(policy.sanitizeToolCallIds).toBe(false); + expect(policy.toolCallIdMode).toBeUndefined(); }); }); diff --git a/src/agents/transcript-policy.ts b/src/agents/transcript-policy.ts index e25ea5545..22e173320 100644 --- a/src/agents/transcript-policy.ts +++ b/src/agents/transcript-policy.ts @@ -95,7 +95,7 @@ export function resolveTranscriptPolicy(params: { const needsNonImageSanitize = isGoogle || isAnthropic || isMistral || isOpenRouterGemini; - const sanitizeToolCallIds = isGoogle || isMistral || isAnthropic || isOpenAi; + const sanitizeToolCallIds = isGoogle || isMistral || isAnthropic; const toolCallIdMode: ToolCallIdMode | undefined = isMistral ? "strict9" : sanitizeToolCallIds @@ -109,7 +109,7 @@ export function resolveTranscriptPolicy(params: { return { sanitizeMode: isOpenAi ? "images-only" : needsNonImageSanitize ? "full" : "images-only", - sanitizeToolCallIds, + sanitizeToolCallIds: !isOpenAi && sanitizeToolCallIds, toolCallIdMode, repairToolUseResultPairing: !isOpenAi && repairToolUseResultPairing, preserveSignatures: isAntigravityClaudeModel,