diff --git a/src/agents/compaction.ts b/src/agents/compaction.ts index d60d1af2a..80021e7ad 100644 --- a/src/agents/compaction.ts +++ b/src/agents/compaction.ts @@ -68,6 +68,11 @@ export function splitMessagesByTokenShare( return chunks; } +// Overhead reserved for summarization prompt, system prompt, previous summary, +// and serialization wrappers ( tags, instructions, etc.). +// generateSummary uses reasoning: "high" which also consumes context budget. +export const SUMMARIZATION_OVERHEAD_TOKENS = 4096; + export function chunkMessagesByMaxTokens( messages: AgentMessage[], maxTokens: number, @@ -76,13 +81,17 @@ export function chunkMessagesByMaxTokens( return []; } + // Apply safety margin to compensate for estimateTokens() underestimation + // (chars/4 heuristic misses multi-byte chars, special tokens, code tokens, etc.) + const effectiveMax = Math.max(1, Math.floor(maxTokens / SAFETY_MARGIN)); + const chunks: AgentMessage[][] = []; let currentChunk: AgentMessage[] = []; let currentTokens = 0; for (const message of messages) { const messageTokens = estimateTokens(message); - if (currentChunk.length > 0 && currentTokens + messageTokens > maxTokens) { + if (currentChunk.length > 0 && currentTokens + messageTokens > effectiveMax) { chunks.push(currentChunk); currentChunk = []; currentTokens = 0; @@ -91,7 +100,7 @@ export function chunkMessagesByMaxTokens( currentChunk.push(message); currentTokens += messageTokens; - if (messageTokens > maxTokens) { + if (messageTokens > effectiveMax) { // Split oversized messages to avoid unbounded chunk growth. chunks.push(currentChunk); currentChunk = []; diff --git a/src/agents/pi-extensions/compaction-safeguard.ts b/src/agents/pi-extensions/compaction-safeguard.ts index 12c6627e4..ed0f0434c 100644 --- a/src/agents/pi-extensions/compaction-safeguard.ts +++ b/src/agents/pi-extensions/compaction-safeguard.ts @@ -7,6 +7,7 @@ import { BASE_CHUNK_RATIO, MIN_CHUNK_RATIO, SAFETY_MARGIN, + SUMMARIZATION_OVERHEAD_TOKENS, computeAdaptiveChunkRatio, estimateMessagesTokens, isOversizedForSummary, @@ -268,7 +269,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { ); const droppedMaxChunkTokens = Math.max( 1, - Math.floor(contextWindowTokens * droppedChunkRatio), + Math.floor(contextWindowTokens * droppedChunkRatio) - + SUMMARIZATION_OVERHEAD_TOKENS, ); droppedSummary = await summarizeInStages({ messages: pruned.droppedMessagesList, @@ -293,10 +295,15 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { } } - // Use adaptive chunk ratio based on message sizes + // Use adaptive chunk ratio based on message sizes, reserving headroom for + // the summarization prompt, system prompt, previous summary, and reasoning budget + // that generateSummary adds on top of the serialized conversation chunk. const allMessages = [...messagesToSummarize, ...turnPrefixMessages]; const adaptiveRatio = computeAdaptiveChunkRatio(allMessages, contextWindowTokens); - const maxChunkTokens = Math.max(1, Math.floor(contextWindowTokens * adaptiveRatio)); + const maxChunkTokens = Math.max( + 1, + Math.floor(contextWindowTokens * adaptiveRatio) - SUMMARIZATION_OVERHEAD_TOKENS, + ); const reserveTokens = Math.max(1, Math.floor(preparation.settings.reserveTokens)); // Feed dropped-messages summary as previousSummary so the main summarization