diff --git a/CHANGELOG.md b/CHANGELOG.md index c4af76dab..82a13e6bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Compaction: remove orphaned `tool_result` messages during history pruning to prevent session corruption from aborted tool calls. (#9868, fixes #9769, #9724, #9672) - Telegram: pass `parentPeer` for forum topic binding inheritance so group-level bindings apply to all topics within the group. (#9789, fixes #9545, #9351) - CLI: pass `--disable-warning=ExperimentalWarning` as a Node CLI option when respawning (avoid disallowed `NODE_OPTIONS` usage; fixes npm pack). (#9691) Thanks @18-RAJAT. - CLI: resolve bundled Chrome extension assets by walking up to the nearest assets directory; add resolver and clipboard tests. (#8914) Thanks @kelvinCB. diff --git a/src/agents/compaction.test.ts b/src/agents/compaction.test.ts index 9663b8a52..88273fb4c 100644 --- a/src/agents/compaction.test.ts +++ b/src/agents/compaction.test.ts @@ -106,6 +106,10 @@ describe("pruneHistoryForContextShare", () => { }); it("returns droppedMessagesList containing dropped messages", () => { + // Note: This test uses simple user messages with no tool calls. + // When orphaned tool_results exist, droppedMessages may exceed + // droppedMessagesList.length since orphans are counted but not + // added to the list (they lack context for summarization). const messages: AgentMessage[] = [ makeMessage(1, 4000), makeMessage(2, 4000), @@ -121,6 +125,7 @@ describe("pruneHistoryForContextShare", () => { }); expect(pruned.droppedChunks).toBeGreaterThan(0); + // Without orphaned tool_results, counts match exactly expect(pruned.droppedMessagesList.length).toBe(pruned.droppedMessages); // All messages accounted for: kept + dropped = original @@ -145,4 +150,144 @@ describe("pruneHistoryForContextShare", () => { expect(pruned.droppedMessagesList).toEqual([]); expect(pruned.messages.length).toBe(1); }); + + it("removes orphaned tool_result messages when tool_use is dropped", () => { + // Scenario: assistant with tool_use is in chunk 1 (dropped), + // tool_result is in chunk 2 (kept) - orphaned tool_result should be removed + // to prevent "unexpected tool_use_id" errors from Anthropic's API + const messages: AgentMessage[] = [ + // Chunk 1 (will be dropped) - contains tool_use + { + role: "assistant", + content: [ + { type: "text", text: "x".repeat(4000) }, + { type: "toolUse", id: "call_123", name: "test_tool", input: {} }, + ], + timestamp: 1, + }, + // Chunk 2 (will be kept) - contains orphaned tool_result + { + role: "toolResult", + toolCallId: "call_123", + toolName: "test_tool", + content: [{ type: "text", text: "result".repeat(500) }], + timestamp: 2, + } as AgentMessage, + { + role: "user", + content: "x".repeat(500), + timestamp: 3, + }, + ]; + + const pruned = pruneHistoryForContextShare({ + messages, + maxContextTokens: 2000, + maxHistoryShare: 0.5, + parts: 2, + }); + + // The orphaned tool_result should NOT be in kept messages + // (this is the critical invariant that prevents API errors) + const keptRoles = pruned.messages.map((m) => m.role); + expect(keptRoles).not.toContain("toolResult"); + + // The orphan count should be reflected in droppedMessages + // (orphaned tool_results are dropped but not added to droppedMessagesList + // since they lack context for summarization) + expect(pruned.droppedMessages).toBeGreaterThan(pruned.droppedMessagesList.length); + }); + + it("keeps tool_result when its tool_use is also kept", () => { + // Scenario: both tool_use and tool_result are in the kept portion + const messages: AgentMessage[] = [ + // Chunk 1 (will be dropped) - just user content + { + role: "user", + content: "x".repeat(4000), + timestamp: 1, + }, + // Chunk 2 (will be kept) - contains both tool_use and tool_result + { + role: "assistant", + content: [ + { type: "text", text: "y".repeat(500) }, + { type: "toolUse", id: "call_456", name: "kept_tool", input: {} }, + ], + timestamp: 2, + }, + { + role: "toolResult", + toolCallId: "call_456", + toolName: "kept_tool", + content: [{ type: "text", text: "result" }], + timestamp: 3, + } as AgentMessage, + ]; + + const pruned = pruneHistoryForContextShare({ + messages, + maxContextTokens: 2000, + maxHistoryShare: 0.5, + parts: 2, + }); + + // Both assistant and toolResult should be in kept messages + const keptRoles = pruned.messages.map((m) => m.role); + expect(keptRoles).toContain("assistant"); + expect(keptRoles).toContain("toolResult"); + }); + + it("removes multiple orphaned tool_results from the same dropped tool_use", () => { + // Scenario: assistant with multiple tool_use blocks is dropped, + // all corresponding tool_results should be removed from kept messages + const messages: AgentMessage[] = [ + // Chunk 1 (will be dropped) - contains multiple tool_use blocks + { + role: "assistant", + content: [ + { type: "text", text: "x".repeat(4000) }, + { type: "toolUse", id: "call_a", name: "tool_a", input: {} }, + { type: "toolUse", id: "call_b", name: "tool_b", input: {} }, + ], + timestamp: 1, + }, + // Chunk 2 (will be kept) - contains orphaned tool_results + { + role: "toolResult", + toolCallId: "call_a", + toolName: "tool_a", + content: [{ type: "text", text: "result_a" }], + timestamp: 2, + } as AgentMessage, + { + role: "toolResult", + toolCallId: "call_b", + toolName: "tool_b", + content: [{ type: "text", text: "result_b" }], + timestamp: 3, + } as AgentMessage, + { + role: "user", + content: "x".repeat(500), + timestamp: 4, + }, + ]; + + const pruned = pruneHistoryForContextShare({ + messages, + maxContextTokens: 2000, + maxHistoryShare: 0.5, + parts: 2, + }); + + // No orphaned tool_results should be in kept messages + const keptToolResults = pruned.messages.filter((m) => m.role === "toolResult"); + expect(keptToolResults).toHaveLength(0); + + // The orphan count should reflect both dropped tool_results + // droppedMessages = 1 (assistant) + 2 (orphaned tool_results) = 3 + // droppedMessagesList only has the assistant message + expect(pruned.droppedMessages).toBe(pruned.droppedMessagesList.length + 2); + }); }); diff --git a/src/agents/compaction.ts b/src/agents/compaction.ts index baa101be8..783d59b76 100644 --- a/src/agents/compaction.ts +++ b/src/agents/compaction.ts @@ -2,6 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { ExtensionContext } from "@mariozechner/pi-coding-agent"; import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent"; import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js"; +import { repairToolUseResultPairing } from "./session-transcript-repair.js"; export const BASE_CHUNK_RATIO = 0.4; export const MIN_CHUNK_RATIO = 0.15; @@ -333,11 +334,27 @@ export function pruneHistoryForContextShare(params: { break; } const [dropped, ...rest] = chunks; + const flatRest = rest.flat(); + + // After dropping a chunk, repair tool_use/tool_result pairing to handle + // orphaned tool_results (whose tool_use was in the dropped chunk). + // repairToolUseResultPairing drops orphaned tool_results, preventing + // "unexpected tool_use_id" errors from Anthropic's API. + const repairReport = repairToolUseResultPairing(flatRest); + const repairedKept = repairReport.messages; + + // Track orphaned tool_results as dropped (they were in kept but their tool_use was dropped) + const orphanedCount = repairReport.droppedOrphanCount; + droppedChunks += 1; - droppedMessages += dropped.length; + droppedMessages += dropped.length + orphanedCount; droppedTokens += estimateMessagesTokens(dropped); + // Note: We don't have the actual orphaned messages to add to droppedMessagesList + // since repairToolUseResultPairing doesn't return them. This is acceptable since + // the dropped messages are used for summarization, and orphaned tool_results + // without their tool_use context aren't useful for summarization anyway. allDroppedMessages.push(...dropped); - keptMessages = rest.flat(); + keptMessages = repairedKept; } return {