fix: recover from context overflow caused by oversized tool results (#11579)
* fix: gracefully handle oversized tool results causing context overflow When a subagent reads a very large file or gets a huge tool result (e.g., gh pr diff on a massive PR), it can exceed the model's context window in a single prompt. Auto-compaction can't help because there's no older history to compact — just one giant tool result. This adds two layers of defense: 1. Pre-emptive: Hard cap on tool result size (400K chars ≈ 100K tokens) applied in the session tool result guard before persistence. This prevents extremely large tool results from being stored in full, regardless of model context window size. 2. Recovery: When context overflow is detected and compaction fails, scan session messages for oversized tool results relative to the model's actual context window (30% max share). If found, truncate them in the session via branching (creating a new branch with truncated content) and retry the prompt. The truncation preserves the beginning of the content (most useful for understanding what was read) and appends a notice explaining the truncation and suggesting offset/limit parameters for targeted reads. Includes comprehensive tests for: - Text truncation with newline-boundary awareness - Context-window-proportional size calculation - In-memory message truncation - Oversized detection heuristics - Guard-level size capping during persistence * fix: prep fixes for tool result truncation PR (#11579) (thanks @tyler6204)
This commit is contained in:
@@ -27,6 +27,7 @@ Docs: https://docs.openclaw.ai
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
|
|
||||||
|
- Agents: recover from context overflow caused by oversized tool results (pre-emptive capping + fallback truncation). (#11579) Thanks @tyler6204.
|
||||||
- Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204.
|
- Cron: scheduler reliability (timer drift, restart catch-up, lock contention, stale running markers). (#10776) Thanks @tyler6204.
|
||||||
- Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204.
|
- Cron: store migration hardening (legacy field migration, parse error handling, explicit delivery mode persistence). (#10776) Thanks @tyler6204.
|
||||||
- Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6.
|
- Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6.
|
||||||
|
|||||||
@@ -52,6 +52,10 @@ import { log } from "./logger.js";
|
|||||||
import { resolveModel } from "./model.js";
|
import { resolveModel } from "./model.js";
|
||||||
import { runEmbeddedAttempt } from "./run/attempt.js";
|
import { runEmbeddedAttempt } from "./run/attempt.js";
|
||||||
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
|
import { buildEmbeddedRunPayloads } from "./run/payloads.js";
|
||||||
|
import {
|
||||||
|
truncateOversizedToolResultsInSession,
|
||||||
|
sessionLikelyHasOversizedToolResults,
|
||||||
|
} from "./tool-result-truncation.js";
|
||||||
import { describeUnknownError } from "./utils.js";
|
import { describeUnknownError } from "./utils.js";
|
||||||
|
|
||||||
type ApiKeyInfo = ResolvedProviderAuth;
|
type ApiKeyInfo = ResolvedProviderAuth;
|
||||||
@@ -321,6 +325,7 @@ export async function runEmbeddedPiAgent(
|
|||||||
|
|
||||||
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
|
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
|
||||||
let overflowCompactionAttempts = 0;
|
let overflowCompactionAttempts = 0;
|
||||||
|
let toolResultTruncationAttempted = false;
|
||||||
try {
|
try {
|
||||||
while (true) {
|
while (true) {
|
||||||
attemptedThinking.add(thinkLevel);
|
attemptedThinking.add(thinkLevel);
|
||||||
@@ -437,6 +442,47 @@ export async function runEmbeddedPiAgent(
|
|||||||
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
|
`auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback: try truncating oversized tool results in the session.
|
||||||
|
// This handles the case where a single tool result (e.g., reading a
|
||||||
|
// huge file or getting a massive PR diff) exceeds the context window,
|
||||||
|
// and compaction can't help because there's no older history to compact.
|
||||||
|
if (!toolResultTruncationAttempted) {
|
||||||
|
const contextWindowTokens = ctxInfo.tokens;
|
||||||
|
const hasOversized = attempt.messagesSnapshot
|
||||||
|
? sessionLikelyHasOversizedToolResults({
|
||||||
|
messages: attempt.messagesSnapshot,
|
||||||
|
contextWindowTokens,
|
||||||
|
})
|
||||||
|
: false;
|
||||||
|
|
||||||
|
if (hasOversized) {
|
||||||
|
toolResultTruncationAttempted = true;
|
||||||
|
log.warn(
|
||||||
|
`[context-overflow-recovery] Attempting tool result truncation for ${provider}/${modelId} ` +
|
||||||
|
`(contextWindow=${contextWindowTokens} tokens)`,
|
||||||
|
);
|
||||||
|
const truncResult = await truncateOversizedToolResultsInSession({
|
||||||
|
sessionFile: params.sessionFile,
|
||||||
|
contextWindowTokens,
|
||||||
|
sessionId: params.sessionId,
|
||||||
|
sessionKey: params.sessionKey,
|
||||||
|
});
|
||||||
|
if (truncResult.truncated) {
|
||||||
|
log.info(
|
||||||
|
`[context-overflow-recovery] Truncated ${truncResult.truncatedCount} tool result(s); retrying prompt`,
|
||||||
|
);
|
||||||
|
// Reset compaction attempts so compaction can be tried again
|
||||||
|
// after truncation (the session is now smaller)
|
||||||
|
overflowCompactionAttempts = 0;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
log.warn(
|
||||||
|
`[context-overflow-recovery] Tool result truncation did not help: ${truncResult.reason ?? "unknown"}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
|
const kind = isCompactionFailure ? "compaction_failure" : "context_overflow";
|
||||||
return {
|
return {
|
||||||
payloads: [
|
payloads: [
|
||||||
|
|||||||
215
src/agents/pi-embedded-runner/tool-result-truncation.test.ts
Normal file
215
src/agents/pi-embedded-runner/tool-result-truncation.test.ts
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import { describe, expect, it } from "vitest";
|
||||||
|
import {
|
||||||
|
truncateToolResultText,
|
||||||
|
calculateMaxToolResultChars,
|
||||||
|
truncateOversizedToolResultsInMessages,
|
||||||
|
isOversizedToolResult,
|
||||||
|
sessionLikelyHasOversizedToolResults,
|
||||||
|
HARD_MAX_TOOL_RESULT_CHARS,
|
||||||
|
} from "./tool-result-truncation.js";
|
||||||
|
|
||||||
|
function makeToolResult(text: string, toolCallId = "call_1"): AgentMessage {
|
||||||
|
return {
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId,
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text }],
|
||||||
|
isError: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeUserMessage(text: string): AgentMessage {
|
||||||
|
return {
|
||||||
|
role: "user",
|
||||||
|
content: text,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
function makeAssistantMessage(text: string): AgentMessage {
|
||||||
|
return {
|
||||||
|
role: "assistant",
|
||||||
|
content: [{ type: "text", text }],
|
||||||
|
api: "messages",
|
||||||
|
provider: "anthropic",
|
||||||
|
model: "claude-sonnet-4-20250514",
|
||||||
|
usage: {
|
||||||
|
inputTokens: 0,
|
||||||
|
outputTokens: 0,
|
||||||
|
cacheReadInputTokens: 0,
|
||||||
|
cacheCreationInputTokens: 0,
|
||||||
|
},
|
||||||
|
stopReason: "end_turn",
|
||||||
|
timestamp: Date.now(),
|
||||||
|
} as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
describe("truncateToolResultText", () => {
|
||||||
|
it("returns text unchanged when under limit", () => {
|
||||||
|
const text = "hello world";
|
||||||
|
expect(truncateToolResultText(text, 1000)).toBe(text);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates text that exceeds limit", () => {
|
||||||
|
const text = "a".repeat(10_000);
|
||||||
|
const result = truncateToolResultText(text, 5_000);
|
||||||
|
expect(result.length).toBeLessThan(text.length);
|
||||||
|
expect(result).toContain("truncated");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves at least MIN_KEEP_CHARS (2000)", () => {
|
||||||
|
const text = "x".repeat(50_000);
|
||||||
|
const result = truncateToolResultText(text, 100); // Even with small limit
|
||||||
|
expect(result.length).toBeGreaterThan(2000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("tries to break at newline boundary", () => {
|
||||||
|
const lines = Array.from({ length: 100 }, (_, i) => `line ${i}: ${"x".repeat(50)}`).join("\n");
|
||||||
|
const result = truncateToolResultText(lines, 3000);
|
||||||
|
// Should contain truncation notice
|
||||||
|
expect(result).toContain("truncated");
|
||||||
|
// The truncated content should be shorter than the original
|
||||||
|
expect(result.length).toBeLessThan(lines.length);
|
||||||
|
// Extract the kept content (before the truncation suffix marker)
|
||||||
|
const suffixIndex = result.indexOf("\n\n⚠️");
|
||||||
|
if (suffixIndex > 0) {
|
||||||
|
const keptContent = result.slice(0, suffixIndex);
|
||||||
|
// Should end at a newline boundary (i.e., the last char before suffix is a complete line)
|
||||||
|
const lastNewline = keptContent.lastIndexOf("\n");
|
||||||
|
// The last newline should be near the end (within the last line)
|
||||||
|
expect(lastNewline).toBeGreaterThan(keptContent.length - 100);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("calculateMaxToolResultChars", () => {
|
||||||
|
it("scales with context window size", () => {
|
||||||
|
const small = calculateMaxToolResultChars(32_000);
|
||||||
|
const large = calculateMaxToolResultChars(200_000);
|
||||||
|
expect(large).toBeGreaterThan(small);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("caps at HARD_MAX_TOOL_RESULT_CHARS for very large windows", () => {
|
||||||
|
const result = calculateMaxToolResultChars(2_000_000); // 2M token window
|
||||||
|
expect(result).toBeLessThanOrEqual(HARD_MAX_TOOL_RESULT_CHARS);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns reasonable size for 128K context", () => {
|
||||||
|
const result = calculateMaxToolResultChars(128_000);
|
||||||
|
// 30% of 128K = 38.4K tokens * 4 chars = 153.6K chars
|
||||||
|
expect(result).toBeGreaterThan(100_000);
|
||||||
|
expect(result).toBeLessThan(200_000);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("isOversizedToolResult", () => {
|
||||||
|
it("returns false for small tool results", () => {
|
||||||
|
const msg = makeToolResult("small content");
|
||||||
|
expect(isOversizedToolResult(msg, 200_000)).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns true for oversized tool results", () => {
|
||||||
|
const msg = makeToolResult("x".repeat(500_000));
|
||||||
|
expect(isOversizedToolResult(msg, 128_000)).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false for non-toolResult messages", () => {
|
||||||
|
const msg = makeUserMessage("x".repeat(500_000));
|
||||||
|
expect(isOversizedToolResult(msg, 128_000)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("truncateOversizedToolResultsInMessages", () => {
|
||||||
|
it("returns unchanged messages when nothing is oversized", () => {
|
||||||
|
const messages = [
|
||||||
|
makeUserMessage("hello"),
|
||||||
|
makeAssistantMessage("using tool"),
|
||||||
|
makeToolResult("small result"),
|
||||||
|
];
|
||||||
|
const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
|
||||||
|
messages,
|
||||||
|
200_000,
|
||||||
|
);
|
||||||
|
expect(truncatedCount).toBe(0);
|
||||||
|
expect(result).toEqual(messages);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("truncates oversized tool results", () => {
|
||||||
|
const bigContent = "x".repeat(500_000);
|
||||||
|
const messages = [
|
||||||
|
makeUserMessage("hello"),
|
||||||
|
makeAssistantMessage("reading file"),
|
||||||
|
makeToolResult(bigContent),
|
||||||
|
];
|
||||||
|
const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
|
||||||
|
messages,
|
||||||
|
128_000,
|
||||||
|
);
|
||||||
|
expect(truncatedCount).toBe(1);
|
||||||
|
const toolResult = result[2] as { content: Array<{ text: string }> };
|
||||||
|
expect(toolResult.content[0].text.length).toBeLessThan(bigContent.length);
|
||||||
|
expect(toolResult.content[0].text).toContain("truncated");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("preserves non-toolResult messages", () => {
|
||||||
|
const messages = [
|
||||||
|
makeUserMessage("hello"),
|
||||||
|
makeAssistantMessage("reading file"),
|
||||||
|
makeToolResult("x".repeat(500_000)),
|
||||||
|
];
|
||||||
|
const { messages: result } = truncateOversizedToolResultsInMessages(messages, 128_000);
|
||||||
|
expect(result[0]).toBe(messages[0]); // Same reference
|
||||||
|
expect(result[1]).toBe(messages[1]); // Same reference
|
||||||
|
});
|
||||||
|
|
||||||
|
it("handles multiple oversized tool results", () => {
|
||||||
|
const messages = [
|
||||||
|
makeUserMessage("hello"),
|
||||||
|
makeAssistantMessage("reading files"),
|
||||||
|
makeToolResult("x".repeat(500_000), "call_1"),
|
||||||
|
makeToolResult("y".repeat(500_000), "call_2"),
|
||||||
|
];
|
||||||
|
const { messages: result, truncatedCount } = truncateOversizedToolResultsInMessages(
|
||||||
|
messages,
|
||||||
|
128_000,
|
||||||
|
);
|
||||||
|
expect(truncatedCount).toBe(2);
|
||||||
|
for (const msg of result.slice(2)) {
|
||||||
|
const tr = msg as { content: Array<{ text: string }> };
|
||||||
|
expect(tr.content[0].text.length).toBeLessThan(500_000);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe("sessionLikelyHasOversizedToolResults", () => {
|
||||||
|
it("returns false when no tool results are oversized", () => {
|
||||||
|
const messages = [makeUserMessage("hello"), makeToolResult("small result")];
|
||||||
|
expect(
|
||||||
|
sessionLikelyHasOversizedToolResults({
|
||||||
|
messages,
|
||||||
|
contextWindowTokens: 200_000,
|
||||||
|
}),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns true when a tool result is oversized", () => {
|
||||||
|
const messages = [makeUserMessage("hello"), makeToolResult("x".repeat(500_000))];
|
||||||
|
expect(
|
||||||
|
sessionLikelyHasOversizedToolResults({
|
||||||
|
messages,
|
||||||
|
contextWindowTokens: 128_000,
|
||||||
|
}),
|
||||||
|
).toBe(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns false for empty messages", () => {
|
||||||
|
expect(
|
||||||
|
sessionLikelyHasOversizedToolResults({
|
||||||
|
messages: [],
|
||||||
|
contextWindowTokens: 200_000,
|
||||||
|
}),
|
||||||
|
).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
328
src/agents/pi-embedded-runner/tool-result-truncation.ts
Normal file
328
src/agents/pi-embedded-runner/tool-result-truncation.ts
Normal file
@@ -0,0 +1,328 @@
|
|||||||
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import type { TextContent } from "@mariozechner/pi-ai";
|
||||||
|
import { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||||
|
import { log } from "./logger.js";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum share of the context window a single tool result should occupy.
|
||||||
|
* This is intentionally conservative – a single tool result should not
|
||||||
|
* consume more than 30% of the context window even without other messages.
|
||||||
|
*/
|
||||||
|
const MAX_TOOL_RESULT_CONTEXT_SHARE = 0.3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Hard character limit for a single tool result text block.
|
||||||
|
* Even for the largest context windows (~2M tokens), a single tool result
|
||||||
|
* should not exceed ~400K characters (~100K tokens).
|
||||||
|
* This acts as a safety net when we don't know the context window size.
|
||||||
|
*/
|
||||||
|
export const HARD_MAX_TOOL_RESULT_CHARS = 400_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Minimum characters to keep when truncating.
|
||||||
|
* We always keep at least the first portion so the model understands
|
||||||
|
* what was in the content.
|
||||||
|
*/
|
||||||
|
const MIN_KEEP_CHARS = 2_000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix appended to truncated tool results.
|
||||||
|
*/
|
||||||
|
const TRUNCATION_SUFFIX =
|
||||||
|
"\n\n⚠️ [Content truncated — original was too large for the model's context window. " +
|
||||||
|
"The content above is a partial view. If you need more, request specific sections or use " +
|
||||||
|
"offset/limit parameters to read smaller chunks.]";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate a single text string to fit within maxChars, preserving the beginning.
|
||||||
|
*/
|
||||||
|
export function truncateToolResultText(text: string, maxChars: number): string {
|
||||||
|
if (text.length <= maxChars) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
const keepChars = Math.max(MIN_KEEP_CHARS, maxChars - TRUNCATION_SUFFIX.length);
|
||||||
|
// Try to break at a newline boundary to avoid cutting mid-line
|
||||||
|
let cutPoint = keepChars;
|
||||||
|
const lastNewline = text.lastIndexOf("\n", keepChars);
|
||||||
|
if (lastNewline > keepChars * 0.8) {
|
||||||
|
cutPoint = lastNewline;
|
||||||
|
}
|
||||||
|
return text.slice(0, cutPoint) + TRUNCATION_SUFFIX;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate the maximum allowed characters for a single tool result
|
||||||
|
* based on the model's context window tokens.
|
||||||
|
*
|
||||||
|
* Uses a rough 4 chars ≈ 1 token heuristic (conservative for English text;
|
||||||
|
* actual ratio varies by tokenizer).
|
||||||
|
*/
|
||||||
|
export function calculateMaxToolResultChars(contextWindowTokens: number): number {
|
||||||
|
const maxTokens = Math.floor(contextWindowTokens * MAX_TOOL_RESULT_CONTEXT_SHARE);
|
||||||
|
// Rough conversion: ~4 chars per token on average
|
||||||
|
const maxChars = maxTokens * 4;
|
||||||
|
return Math.min(maxChars, HARD_MAX_TOOL_RESULT_CHARS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the total character count of text content blocks in a tool result message.
|
||||||
|
*/
|
||||||
|
function getToolResultTextLength(msg: AgentMessage): number {
|
||||||
|
if (!msg || (msg as { role?: string }).role !== "toolResult") {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const content = (msg as { content?: unknown }).content;
|
||||||
|
if (!Array.isArray(content)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
let totalLength = 0;
|
||||||
|
for (const block of content) {
|
||||||
|
if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
|
||||||
|
const text = (block as TextContent).text;
|
||||||
|
if (typeof text === "string") {
|
||||||
|
totalLength += text.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return totalLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate a tool result message's text content blocks to fit within maxChars.
|
||||||
|
* Returns a new message (does not mutate the original).
|
||||||
|
*/
|
||||||
|
function truncateToolResultMessage(msg: AgentMessage, maxChars: number): AgentMessage {
|
||||||
|
const content = (msg as { content?: unknown }).content;
|
||||||
|
if (!Array.isArray(content)) {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate total text size
|
||||||
|
const totalTextChars = getToolResultTextLength(msg);
|
||||||
|
if (totalTextChars <= maxChars) {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Distribute the budget proportionally among text blocks
|
||||||
|
const newContent = content.map((block: unknown) => {
|
||||||
|
if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
|
||||||
|
return block; // Keep non-text blocks (images) as-is
|
||||||
|
}
|
||||||
|
const textBlock = block as TextContent;
|
||||||
|
if (typeof textBlock.text !== "string") {
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
// Proportional budget for this block
|
||||||
|
const blockShare = textBlock.text.length / totalTextChars;
|
||||||
|
const blockBudget = Math.max(MIN_KEEP_CHARS, Math.floor(maxChars * blockShare));
|
||||||
|
return {
|
||||||
|
...textBlock,
|
||||||
|
text: truncateToolResultText(textBlock.text, blockBudget),
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return { ...msg, content: newContent } as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find oversized tool result entries in a session and truncate them.
|
||||||
|
*
|
||||||
|
* This operates on the session file by:
|
||||||
|
* 1. Opening the session manager
|
||||||
|
* 2. Walking the current branch to find oversized tool results
|
||||||
|
* 3. Branching from before the first oversized tool result
|
||||||
|
* 4. Re-appending all entries from that point with truncated tool results
|
||||||
|
*
|
||||||
|
* @returns Object indicating whether any truncation was performed
|
||||||
|
*/
|
||||||
|
export async function truncateOversizedToolResultsInSession(params: {
|
||||||
|
sessionFile: string;
|
||||||
|
contextWindowTokens: number;
|
||||||
|
sessionId?: string;
|
||||||
|
sessionKey?: string;
|
||||||
|
}): Promise<{ truncated: boolean; truncatedCount: number; reason?: string }> {
|
||||||
|
const { sessionFile, contextWindowTokens } = params;
|
||||||
|
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const sessionManager = SessionManager.open(sessionFile);
|
||||||
|
const branch = sessionManager.getBranch();
|
||||||
|
|
||||||
|
if (branch.length === 0) {
|
||||||
|
return { truncated: false, truncatedCount: 0, reason: "empty session" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find oversized tool result entries and their indices in the branch
|
||||||
|
const oversizedIndices: number[] = [];
|
||||||
|
for (let i = 0; i < branch.length; i++) {
|
||||||
|
const entry = branch[i];
|
||||||
|
if (entry.type !== "message") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const msg = entry.message;
|
||||||
|
if ((msg as { role?: string }).role !== "toolResult") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const textLength = getToolResultTextLength(msg);
|
||||||
|
if (textLength > maxChars) {
|
||||||
|
oversizedIndices.push(i);
|
||||||
|
log.info(
|
||||||
|
`[tool-result-truncation] Found oversized tool result: ` +
|
||||||
|
`entry=${entry.id} chars=${textLength} maxChars=${maxChars} ` +
|
||||||
|
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (oversizedIndices.length === 0) {
|
||||||
|
return { truncated: false, truncatedCount: 0, reason: "no oversized tool results" };
|
||||||
|
}
|
||||||
|
|
||||||
|
// Branch from the parent of the first oversized entry
|
||||||
|
const firstOversizedIdx = oversizedIndices[0];
|
||||||
|
const firstOversizedEntry = branch[firstOversizedIdx];
|
||||||
|
const branchFromId = firstOversizedEntry.parentId;
|
||||||
|
|
||||||
|
if (!branchFromId) {
|
||||||
|
// The oversized entry is the root - very unusual but handle it
|
||||||
|
sessionManager.resetLeaf();
|
||||||
|
} else {
|
||||||
|
sessionManager.branch(branchFromId);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-append all entries from the first oversized one onwards,
|
||||||
|
// with truncated tool results
|
||||||
|
const oversizedSet = new Set(oversizedIndices);
|
||||||
|
let truncatedCount = 0;
|
||||||
|
|
||||||
|
for (let i = firstOversizedIdx; i < branch.length; i++) {
|
||||||
|
const entry = branch[i];
|
||||||
|
|
||||||
|
if (entry.type === "message") {
|
||||||
|
let message = entry.message;
|
||||||
|
|
||||||
|
if (oversizedSet.has(i)) {
|
||||||
|
message = truncateToolResultMessage(message, maxChars);
|
||||||
|
truncatedCount++;
|
||||||
|
const newLength = getToolResultTextLength(message);
|
||||||
|
log.info(
|
||||||
|
`[tool-result-truncation] Truncated tool result: ` +
|
||||||
|
`originalEntry=${entry.id} newChars=${newLength} ` +
|
||||||
|
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// appendMessage expects Message | CustomMessage | BashExecutionMessage
|
||||||
|
sessionManager.appendMessage(message as Parameters<typeof sessionManager.appendMessage>[0]);
|
||||||
|
} else if (entry.type === "compaction") {
|
||||||
|
sessionManager.appendCompaction(
|
||||||
|
entry.summary,
|
||||||
|
entry.firstKeptEntryId,
|
||||||
|
entry.tokensBefore,
|
||||||
|
entry.details,
|
||||||
|
entry.fromHook,
|
||||||
|
);
|
||||||
|
} else if (entry.type === "thinking_level_change") {
|
||||||
|
sessionManager.appendThinkingLevelChange(entry.thinkingLevel);
|
||||||
|
} else if (entry.type === "model_change") {
|
||||||
|
sessionManager.appendModelChange(entry.provider, entry.modelId);
|
||||||
|
} else if (entry.type === "custom") {
|
||||||
|
sessionManager.appendCustomEntry(entry.customType, entry.data);
|
||||||
|
} else if (entry.type === "custom_message") {
|
||||||
|
sessionManager.appendCustomMessageEntry(
|
||||||
|
entry.customType,
|
||||||
|
entry.content,
|
||||||
|
entry.display,
|
||||||
|
entry.details,
|
||||||
|
);
|
||||||
|
} else if (entry.type === "branch_summary") {
|
||||||
|
// Branch summaries reference specific entry IDs - skip to avoid inconsistency
|
||||||
|
continue;
|
||||||
|
} else if (entry.type === "label") {
|
||||||
|
// Labels reference specific entry IDs - skip to avoid inconsistency
|
||||||
|
continue;
|
||||||
|
} else if (entry.type === "session_info") {
|
||||||
|
if (entry.name) {
|
||||||
|
sessionManager.appendSessionInfo(entry.name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.info(
|
||||||
|
`[tool-result-truncation] Truncated ${truncatedCount} tool result(s) in session ` +
|
||||||
|
`(contextWindow=${contextWindowTokens} maxChars=${maxChars}) ` +
|
||||||
|
`sessionKey=${params.sessionKey ?? params.sessionId ?? "unknown"}`,
|
||||||
|
);
|
||||||
|
|
||||||
|
return { truncated: true, truncatedCount };
|
||||||
|
} catch (err) {
|
||||||
|
const errMsg = err instanceof Error ? err.message : String(err);
|
||||||
|
log.warn(`[tool-result-truncation] Failed to truncate: ${errMsg}`);
|
||||||
|
return { truncated: false, truncatedCount: 0, reason: errMsg };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate oversized tool results in an array of messages (in-memory).
|
||||||
|
* Returns a new array with truncated messages.
|
||||||
|
*
|
||||||
|
* This is used as a pre-emptive guard before sending messages to the LLM,
|
||||||
|
* without modifying the session file.
|
||||||
|
*/
|
||||||
|
export function truncateOversizedToolResultsInMessages(
|
||||||
|
messages: AgentMessage[],
|
||||||
|
contextWindowTokens: number,
|
||||||
|
): { messages: AgentMessage[]; truncatedCount: number } {
|
||||||
|
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
|
||||||
|
let truncatedCount = 0;
|
||||||
|
|
||||||
|
const result = messages.map((msg) => {
|
||||||
|
if ((msg as { role?: string }).role !== "toolResult") {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
const textLength = getToolResultTextLength(msg);
|
||||||
|
if (textLength <= maxChars) {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
truncatedCount++;
|
||||||
|
return truncateToolResultMessage(msg, maxChars);
|
||||||
|
});
|
||||||
|
|
||||||
|
return { messages: result, truncatedCount };
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a tool result message exceeds the size limit for a given context window.
|
||||||
|
*/
|
||||||
|
export function isOversizedToolResult(msg: AgentMessage, contextWindowTokens: number): boolean {
|
||||||
|
if ((msg as { role?: string }).role !== "toolResult") {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
|
||||||
|
return getToolResultTextLength(msg) > maxChars;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Estimate whether the session likely has oversized tool results that caused
|
||||||
|
* a context overflow. Used as a heuristic to decide whether to attempt
|
||||||
|
* tool result truncation before giving up.
|
||||||
|
*/
|
||||||
|
export function sessionLikelyHasOversizedToolResults(params: {
|
||||||
|
messages: AgentMessage[];
|
||||||
|
contextWindowTokens: number;
|
||||||
|
}): boolean {
|
||||||
|
const { messages, contextWindowTokens } = params;
|
||||||
|
const maxChars = calculateMaxToolResultChars(contextWindowTokens);
|
||||||
|
|
||||||
|
for (const msg of messages) {
|
||||||
|
if ((msg as { role?: string }).role !== "toolResult") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const textLength = getToolResultTextLength(msg);
|
||||||
|
if (textLength > maxChars) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
@@ -206,4 +206,67 @@ describe("installSessionToolResultGuard", () => {
|
|||||||
|
|
||||||
expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]);
|
expect(messages.map((m) => m.role)).toEqual(["assistant", "toolResult"]);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("caps oversized tool result text during persistence", () => {
|
||||||
|
const sm = SessionManager.inMemory();
|
||||||
|
installSessionToolResultGuard(sm);
|
||||||
|
|
||||||
|
sm.appendMessage(toolCallMessage);
|
||||||
|
sm.appendMessage(
|
||||||
|
asAppendMessage({
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: "x".repeat(500_000) }],
|
||||||
|
isError: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const entries = sm
|
||||||
|
.getEntries()
|
||||||
|
.filter((e) => e.type === "message")
|
||||||
|
.map((e) => (e as { message: AgentMessage }).message);
|
||||||
|
|
||||||
|
const toolResult = entries.find((m) => m.role === "toolResult") as {
|
||||||
|
content: Array<{ type: string; text: string }>;
|
||||||
|
};
|
||||||
|
expect(toolResult).toBeDefined();
|
||||||
|
const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
expect(textBlock.text.length).toBeLessThan(500_000);
|
||||||
|
expect(textBlock.text).toContain("truncated");
|
||||||
|
});
|
||||||
|
|
||||||
|
it("does not truncate tool results under the limit", () => {
|
||||||
|
const sm = SessionManager.inMemory();
|
||||||
|
installSessionToolResultGuard(sm);
|
||||||
|
|
||||||
|
const originalText = "small tool result";
|
||||||
|
sm.appendMessage(toolCallMessage);
|
||||||
|
sm.appendMessage(
|
||||||
|
asAppendMessage({
|
||||||
|
role: "toolResult",
|
||||||
|
toolCallId: "call_1",
|
||||||
|
toolName: "read",
|
||||||
|
content: [{ type: "text", text: originalText }],
|
||||||
|
isError: false,
|
||||||
|
timestamp: Date.now(),
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const entries = sm
|
||||||
|
.getEntries()
|
||||||
|
.filter((e) => e.type === "message")
|
||||||
|
.map((e) => (e as { message: AgentMessage }).message);
|
||||||
|
|
||||||
|
const toolResult = entries.find((m) => m.role === "toolResult") as {
|
||||||
|
content: Array<{ type: string; text: string }>;
|
||||||
|
};
|
||||||
|
const textBlock = toolResult.content.find((b: { type: string }) => b.type === "text") as {
|
||||||
|
text: string;
|
||||||
|
};
|
||||||
|
expect(textBlock.text).toBe(originalText);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -1,8 +1,76 @@
|
|||||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||||
|
import type { TextContent } from "@mariozechner/pi-ai";
|
||||||
import type { SessionManager } from "@mariozechner/pi-coding-agent";
|
import type { SessionManager } from "@mariozechner/pi-coding-agent";
|
||||||
import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
|
import { emitSessionTranscriptUpdate } from "../sessions/transcript-events.js";
|
||||||
|
import { HARD_MAX_TOOL_RESULT_CHARS } from "./pi-embedded-runner/tool-result-truncation.js";
|
||||||
import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";
|
import { makeMissingToolResult, sanitizeToolCallInputs } from "./session-transcript-repair.js";
|
||||||
|
|
||||||
|
const GUARD_TRUNCATION_SUFFIX =
|
||||||
|
"\n\n⚠️ [Content truncated during persistence — original exceeded size limit. " +
|
||||||
|
"Use offset/limit parameters or request specific sections for large content.]";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Truncate oversized text content blocks in a tool result message.
|
||||||
|
* Returns the original message if under the limit, or a new message with
|
||||||
|
* truncated text blocks otherwise.
|
||||||
|
*/
|
||||||
|
function capToolResultSize(msg: AgentMessage): AgentMessage {
|
||||||
|
const role = (msg as { role?: string }).role;
|
||||||
|
if (role !== "toolResult") {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
const content = (msg as { content?: unknown }).content;
|
||||||
|
if (!Array.isArray(content)) {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate total text size
|
||||||
|
let totalTextChars = 0;
|
||||||
|
for (const block of content) {
|
||||||
|
if (block && typeof block === "object" && (block as { type?: string }).type === "text") {
|
||||||
|
const text = (block as TextContent).text;
|
||||||
|
if (typeof text === "string") {
|
||||||
|
totalTextChars += text.length;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (totalTextChars <= HARD_MAX_TOOL_RESULT_CHARS) {
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Truncate proportionally
|
||||||
|
const newContent = content.map((block: unknown) => {
|
||||||
|
if (!block || typeof block !== "object" || (block as { type?: string }).type !== "text") {
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
const textBlock = block as TextContent;
|
||||||
|
if (typeof textBlock.text !== "string") {
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
const blockShare = textBlock.text.length / totalTextChars;
|
||||||
|
const blockBudget = Math.max(
|
||||||
|
2_000,
|
||||||
|
Math.floor(HARD_MAX_TOOL_RESULT_CHARS * blockShare) - GUARD_TRUNCATION_SUFFIX.length,
|
||||||
|
);
|
||||||
|
if (textBlock.text.length <= blockBudget) {
|
||||||
|
return block;
|
||||||
|
}
|
||||||
|
// Try to cut at a newline boundary
|
||||||
|
let cutPoint = blockBudget;
|
||||||
|
const lastNewline = textBlock.text.lastIndexOf("\n", blockBudget);
|
||||||
|
if (lastNewline > blockBudget * 0.8) {
|
||||||
|
cutPoint = lastNewline;
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
...textBlock,
|
||||||
|
text: textBlock.text.slice(0, cutPoint) + GUARD_TRUNCATION_SUFFIX,
|
||||||
|
};
|
||||||
|
});
|
||||||
|
|
||||||
|
return { ...msg, content: newContent } as AgentMessage;
|
||||||
|
}
|
||||||
|
|
||||||
type ToolCall = { id: string; name?: string };
|
type ToolCall = { id: string; name?: string };
|
||||||
|
|
||||||
function extractAssistantToolCalls(msg: Extract<AgentMessage, { role: "assistant" }>): ToolCall[] {
|
function extractAssistantToolCalls(msg: Extract<AgentMessage, { role: "assistant" }>): ToolCall[] {
|
||||||
@@ -116,8 +184,11 @@ export function installSessionToolResultGuard(
|
|||||||
if (id) {
|
if (id) {
|
||||||
pending.delete(id);
|
pending.delete(id);
|
||||||
}
|
}
|
||||||
|
// Apply hard size cap before persistence to prevent oversized tool results
|
||||||
|
// from consuming the entire context window on subsequent LLM calls.
|
||||||
|
const capped = capToolResultSize(nextMessage);
|
||||||
return originalAppend(
|
return originalAppend(
|
||||||
persistToolResult(nextMessage, {
|
persistToolResult(capped, {
|
||||||
toolCallId: id ?? undefined,
|
toolCallId: id ?? undefined,
|
||||||
toolName,
|
toolName,
|
||||||
isSynthetic: false,
|
isSynthetic: false,
|
||||||
|
|||||||
Reference in New Issue
Block a user