diff --git a/CHANGELOG.md b/CHANGELOG.md index 88de44537..e01fc8231 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Docs: https://docs.clawd.bot - Heartbeat: normalize target identifiers for consistent routing. - Telegram: use wrapped fetch for long-polling on Node to normalize AbortSignal handling. (#1639) - Exec: keep approvals for elevated ask unless full mode. (#1616) Thanks @ivancasco. +- Agents: auto-compact on context overflow prompt errors before failing. (#1627) Thanks @rodrigouroz. - Gateway: reduce log noise for late invokes + remote node probes; debounce skills refresh. (#1607) Thanks @petter-b. - macOS: default direct-transport `ws://` URLs to port 18789; document `gateway.remote.transport`. (#1603) Thanks @ngutman. diff --git a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts index e8c514a32..874cb68b4 100644 --- a/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts +++ b/src/agents/pi-embedded-helpers.formatassistanterrortext.test.ts @@ -1,15 +1,7 @@ import type { AssistantMessage } from "@mariozechner/pi-ai"; import { describe, expect, it } from "vitest"; import { formatAssistantErrorText } from "./pi-embedded-helpers.js"; -import { DEFAULT_AGENTS_FILENAME } from "./workspace.js"; -const _makeFile = (overrides: Partial): WorkspaceBootstrapFile => ({ - name: DEFAULT_AGENTS_FILENAME, - path: "/tmp/AGENTS.md", - content: "", - missing: false, - ...overrides, -}); describe("formatAssistantErrorText", () => { const makeAssistantError = (errorMessage: string): AssistantMessage => ({ @@ -21,6 +13,16 @@ describe("formatAssistantErrorText", () => { const msg = makeAssistantError("request_too_large"); expect(formatAssistantErrorText(msg)).toContain("Context overflow"); }); + it("returns context overflow for Anthropic 'Request size exceeds model context window'", () => { + // This is the new Anthropic error format that wasn't being detected. + // Without the fix, this falls through to the invalidRequest regex and returns + // "LLM request rejected: Request size exceeds model context window" + // instead of the context overflow message, preventing auto-compaction. + const msg = makeAssistantError( + '{"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}', + ); + expect(formatAssistantErrorText(msg)).toContain("Context overflow"); + }); it("returns a friendly message for Anthropic role ordering", () => { const msg = makeAssistantError('messages: roles must alternate between "user" and "assistant"'); expect(formatAssistantErrorText(msg)).toContain("Message ordering conflict"); diff --git a/src/agents/pi-embedded-helpers.iscontextoverflowerror.test.ts b/src/agents/pi-embedded-helpers.iscontextoverflowerror.test.ts index f456f4319..19165caa5 100644 --- a/src/agents/pi-embedded-helpers.iscontextoverflowerror.test.ts +++ b/src/agents/pi-embedded-helpers.iscontextoverflowerror.test.ts @@ -1,14 +1,6 @@ import { describe, expect, it } from "vitest"; import { isContextOverflowError } from "./pi-embedded-helpers.js"; -import { DEFAULT_AGENTS_FILENAME } from "./workspace.js"; -const _makeFile = (overrides: Partial): WorkspaceBootstrapFile => ({ - name: DEFAULT_AGENTS_FILENAME, - path: "/tmp/AGENTS.md", - content: "", - missing: false, - ...overrides, -}); describe("isContextOverflowError", () => { it("matches known overflow hints", () => { const samples = [ @@ -24,7 +16,34 @@ describe("isContextOverflowError", () => { expect(isContextOverflowError(sample)).toBe(true); } }); + + it("matches Anthropic 'Request size exceeds model context window' error", () => { + // Anthropic returns this error format when the prompt exceeds the context window. + // Without this fix, auto-compaction is NOT triggered because neither + // isContextOverflowError nor pi-ai's isContextOverflow recognizes this pattern. + // The user sees: "LLM request rejected: Request size exceeds model context window" + // instead of automatic compaction + retry. + const anthropicRawError = + '{"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}'; + expect(isContextOverflowError(anthropicRawError)).toBe(true); + }); + + it("matches 'exceeds model context window' in various formats", () => { + const samples = [ + "Request size exceeds model context window", + "request size exceeds model context window", + '400 {"type":"error","error":{"type":"invalid_request_error","message":"Request size exceeds model context window"}}', + "The request size exceeds model context window limit", + ]; + for (const sample of samples) { + expect(isContextOverflowError(sample)).toBe(true); + } + }); + it("ignores unrelated errors", () => { expect(isContextOverflowError("rate limit exceeded")).toBe(false); + expect(isContextOverflowError("request size exceeds upload limit")).toBe(false); + expect(isContextOverflowError("model not found")).toBe(false); + expect(isContextOverflowError("authentication failed")).toBe(false); }); }); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index e8fb7a4d1..fdfed02a2 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -7,12 +7,19 @@ import type { FailoverReason } from "./types.js"; export function isContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) return false; const lower = errorMessage.toLowerCase(); + const hasRequestSizeExceeds = lower.includes("request size exceeds"); + const hasContextWindow = + lower.includes("context window") || + lower.includes("context length") || + lower.includes("maximum context length"); return ( lower.includes("request_too_large") || lower.includes("request exceeds the maximum size") || lower.includes("context length exceeded") || lower.includes("maximum context length") || lower.includes("prompt is too long") || + lower.includes("exceeds model context window") || + (hasRequestSizeExceeds && hasContextWindow) || lower.includes("context overflow") || (lower.includes("413") && lower.includes("too large")) ); diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 8b151a3fc..5917d53c4 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -68,7 +68,7 @@ import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../d import { describeUnknownError, mapThinkingLevel, resolveExecToolDefaults } from "./utils.js"; import { buildTtsSystemPromptHint } from "../../tts/tts.js"; -export async function compactEmbeddedPiSession(params: { +export type CompactEmbeddedPiSessionParams = { sessionId: string; sessionKey?: string; messageChannel?: string; @@ -97,354 +97,365 @@ export async function compactEmbeddedPiSession(params: { enqueue?: typeof enqueueCommand; extraSystemPrompt?: string; ownerNumbers?: string[]; -}): Promise { +}; + +/** + * Core compaction logic without lane queueing. + * Use this when already inside a session/global lane to avoid deadlocks. + */ +export async function compactEmbeddedPiSessionDirect( + params: CompactEmbeddedPiSessionParams, +): Promise { + const resolvedWorkspace = resolveUserPath(params.workspaceDir); + const prevCwd = process.cwd(); + + const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; + const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; + const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); + await ensureClawdbotModelsJson(params.config, agentDir); + const { model, error, authStorage, modelRegistry } = resolveModel( + provider, + modelId, + agentDir, + params.config, + ); + if (!model) { + return { + ok: false, + compacted: false, + reason: error ?? `Unknown model: ${provider}/${modelId}`, + }; + } + try { + const apiKeyInfo = await getApiKeyForModel({ + model, + cfg: params.config, + agentDir, + }); + + if (!apiKeyInfo.apiKey) { + if (apiKeyInfo.mode !== "aws-sdk") { + throw new Error( + `No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`, + ); + } + } else if (model.provider === "github-copilot") { + const { resolveCopilotApiToken } = await import("../../providers/github-copilot-token.js"); + const copilotToken = await resolveCopilotApiToken({ + githubToken: apiKeyInfo.apiKey, + }); + authStorage.setRuntimeApiKey(model.provider, copilotToken.token); + } else { + authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey); + } + } catch (err) { + return { + ok: false, + compacted: false, + reason: describeUnknownError(err), + }; + } + + await fs.mkdir(resolvedWorkspace, { recursive: true }); + const sandboxSessionKey = params.sessionKey?.trim() || params.sessionId; + const sandbox = await resolveSandboxContext({ + config: params.config, + sessionKey: sandboxSessionKey, + workspaceDir: resolvedWorkspace, + }); + const effectiveWorkspace = sandbox?.enabled + ? sandbox.workspaceAccess === "rw" + ? resolvedWorkspace + : sandbox.workspaceDir + : resolvedWorkspace; + await fs.mkdir(effectiveWorkspace, { recursive: true }); + await ensureSessionHeader({ + sessionFile: params.sessionFile, + sessionId: params.sessionId, + cwd: effectiveWorkspace, + }); + + let restoreSkillEnv: (() => void) | undefined; + process.chdir(effectiveWorkspace); + try { + const shouldLoadSkillEntries = !params.skillsSnapshot || !params.skillsSnapshot.resolvedSkills; + const skillEntries = shouldLoadSkillEntries + ? loadWorkspaceSkillEntries(effectiveWorkspace) + : []; + restoreSkillEnv = params.skillsSnapshot + ? applySkillEnvOverridesFromSnapshot({ + snapshot: params.skillsSnapshot, + config: params.config, + }) + : applySkillEnvOverrides({ + skills: skillEntries ?? [], + config: params.config, + }); + const skillsPrompt = resolveSkillsPromptForRun({ + skillsSnapshot: params.skillsSnapshot, + entries: shouldLoadSkillEntries ? skillEntries : undefined, + config: params.config, + workspaceDir: effectiveWorkspace, + }); + + const sessionLabel = params.sessionKey ?? params.sessionId; + const { contextFiles } = await resolveBootstrapContextForRun({ + workspaceDir: effectiveWorkspace, + config: params.config, + sessionKey: params.sessionKey, + sessionId: params.sessionId, + warn: makeBootstrapWarn({ sessionLabel, warn: (message) => log.warn(message) }), + }); + const runAbortController = new AbortController(); + const toolsRaw = createClawdbotCodingTools({ + exec: { + ...resolveExecToolDefaults(params.config), + elevated: params.bashElevated, + }, + sandbox, + messageProvider: params.messageChannel ?? params.messageProvider, + agentAccountId: params.agentAccountId, + sessionKey: params.sessionKey ?? params.sessionId, + groupId: params.groupId, + groupChannel: params.groupChannel, + groupSpace: params.groupSpace, + spawnedBy: params.spawnedBy, + agentDir, + workspaceDir: effectiveWorkspace, + config: params.config, + abortSignal: runAbortController.signal, + modelProvider: model.provider, + modelId, + modelAuthMode: resolveModelAuthMode(model.provider, params.config), + }); + const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider }); + logToolSchemasForGoogle({ tools, provider }); + const machineName = await getMachineDisplayName(); + const runtimeChannel = normalizeMessageChannel(params.messageChannel ?? params.messageProvider); + let runtimeCapabilities = runtimeChannel + ? (resolveChannelCapabilities({ + cfg: params.config, + channel: runtimeChannel, + accountId: params.agentAccountId, + }) ?? []) + : undefined; + if (runtimeChannel === "telegram" && params.config) { + const inlineButtonsScope = resolveTelegramInlineButtonsScope({ + cfg: params.config, + accountId: params.agentAccountId ?? undefined, + }); + if (inlineButtonsScope !== "off") { + if (!runtimeCapabilities) runtimeCapabilities = []; + if ( + !runtimeCapabilities.some((cap) => String(cap).trim().toLowerCase() === "inlinebuttons") + ) { + runtimeCapabilities.push("inlineButtons"); + } + } + } + // Resolve channel-specific message actions for system prompt + const channelActions = runtimeChannel + ? listChannelSupportedActions({ + cfg: params.config, + channel: runtimeChannel, + }) + : undefined; + const messageToolHints = runtimeChannel + ? resolveChannelMessageToolHints({ + cfg: params.config, + channel: runtimeChannel, + accountId: params.agentAccountId, + }) + : undefined; + + const runtimeInfo = { + host: machineName, + os: `${os.type()} ${os.release()}`, + arch: os.arch(), + node: process.version, + model: `${provider}/${modelId}`, + channel: runtimeChannel, + capabilities: runtimeCapabilities, + channelActions, + }; + const sandboxInfo = buildEmbeddedSandboxInfo(sandbox, params.bashElevated); + const reasoningTagHint = isReasoningTagProvider(provider); + const userTimezone = resolveUserTimezone(params.config?.agents?.defaults?.userTimezone); + const userTimeFormat = resolveUserTimeFormat(params.config?.agents?.defaults?.timeFormat); + const userTime = formatUserTime(new Date(), userTimezone, userTimeFormat); + const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({ + sessionKey: params.sessionKey, + config: params.config, + }); + const isDefaultAgent = sessionAgentId === defaultAgentId; + const promptMode = isSubagentSessionKey(params.sessionKey) ? "minimal" : "full"; + const docsPath = await resolveClawdbotDocsPath({ + workspaceDir: effectiveWorkspace, + argv1: process.argv[1], + cwd: process.cwd(), + moduleUrl: import.meta.url, + }); + const ttsHint = params.config ? buildTtsSystemPromptHint(params.config) : undefined; + const appendPrompt = buildEmbeddedSystemPrompt({ + workspaceDir: effectiveWorkspace, + defaultThinkLevel: params.thinkLevel, + reasoningLevel: params.reasoningLevel ?? "off", + extraSystemPrompt: params.extraSystemPrompt, + ownerNumbers: params.ownerNumbers, + reasoningTagHint, + heartbeatPrompt: isDefaultAgent + ? resolveHeartbeatPrompt(params.config?.agents?.defaults?.heartbeat?.prompt) + : undefined, + skillsPrompt, + docsPath: docsPath ?? undefined, + ttsHint, + promptMode, + runtimeInfo, + messageToolHints, + sandboxInfo, + tools, + modelAliasLines: buildModelAliasLines(params.config), + userTimezone, + userTime, + userTimeFormat, + contextFiles, + }); + const systemPrompt = createSystemPromptOverride(appendPrompt); + + const sessionLock = await acquireSessionWriteLock({ + sessionFile: params.sessionFile, + }); + try { + await prewarmSessionFile(params.sessionFile); + const transcriptPolicy = resolveTranscriptPolicy({ + modelApi: model.api, + provider, + modelId, + }); + const sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), { + agentId: sessionAgentId, + sessionKey: params.sessionKey, + allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, + }); + trackSessionManagerAccess(params.sessionFile); + const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir); + ensurePiCompactionReserveTokens({ + settingsManager, + minReserveTokens: resolveCompactionReserveTokensFloor(params.config), + }); + const additionalExtensionPaths = buildEmbeddedExtensionPaths({ + cfg: params.config, + sessionManager, + provider, + modelId, + model, + }); + + const { builtInTools, customTools } = splitSdkTools({ + tools, + sandboxEnabled: !!sandbox?.enabled, + }); + + let session: Awaited>["session"]; + ({ session } = await createAgentSession({ + cwd: resolvedWorkspace, + agentDir, + authStorage, + modelRegistry, + model, + thinkingLevel: mapThinkingLevel(params.thinkLevel), + systemPrompt, + tools: builtInTools, + customTools, + sessionManager, + settingsManager, + skills: [], + contextFiles: [], + additionalExtensionPaths, + })); + + try { + const prior = await sanitizeSessionHistory({ + messages: session.messages, + modelApi: model.api, + modelId, + provider, + sessionManager, + sessionId: params.sessionId, + policy: transcriptPolicy, + }); + const validatedGemini = transcriptPolicy.validateGeminiTurns + ? validateGeminiTurns(prior) + : prior; + const validated = transcriptPolicy.validateAnthropicTurns + ? validateAnthropicTurns(validatedGemini) + : validatedGemini; + const limited = limitHistoryTurns( + validated, + getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), + ); + if (limited.length > 0) { + session.agent.replaceMessages(limited); + } + const result = await session.compact(params.customInstructions); + // Estimate tokens after compaction by summing token estimates for remaining messages + let tokensAfter: number | undefined; + try { + tokensAfter = 0; + for (const message of session.messages) { + tokensAfter += estimateTokens(message); + } + // Sanity check: tokensAfter should be less than tokensBefore + if (tokensAfter > result.tokensBefore) { + tokensAfter = undefined; // Don't trust the estimate + } + } catch { + // If estimation fails, leave tokensAfter undefined + tokensAfter = undefined; + } + return { + ok: true, + compacted: true, + result: { + summary: result.summary, + firstKeptEntryId: result.firstKeptEntryId, + tokensBefore: result.tokensBefore, + tokensAfter, + details: result.details, + }, + }; + } finally { + sessionManager.flushPendingToolResults?.(); + session.dispose(); + } + } finally { + await sessionLock.release(); + } + } catch (err) { + return { + ok: false, + compacted: false, + reason: describeUnknownError(err), + }; + } finally { + restoreSkillEnv?.(); + process.chdir(prevCwd); + } +} + +/** + * Compacts a session with lane queueing (session lane + global lane). + * Use this from outside a lane context. If already inside a lane, use + * `compactEmbeddedPiSessionDirect` to avoid deadlocks. + */ +export async function compactEmbeddedPiSession( + params: CompactEmbeddedPiSessionParams, +): Promise { const sessionLane = resolveSessionLane(params.sessionKey?.trim() || params.sessionId); const globalLane = resolveGlobalLane(params.lane); const enqueueGlobal = params.enqueue ?? ((task, opts) => enqueueCommandInLane(globalLane, task, opts)); return enqueueCommandInLane(sessionLane, () => - enqueueGlobal(async () => { - const resolvedWorkspace = resolveUserPath(params.workspaceDir); - const prevCwd = process.cwd(); - - const provider = (params.provider ?? DEFAULT_PROVIDER).trim() || DEFAULT_PROVIDER; - const modelId = (params.model ?? DEFAULT_MODEL).trim() || DEFAULT_MODEL; - const agentDir = params.agentDir ?? resolveClawdbotAgentDir(); - await ensureClawdbotModelsJson(params.config, agentDir); - const { model, error, authStorage, modelRegistry } = resolveModel( - provider, - modelId, - agentDir, - params.config, - ); - if (!model) { - return { - ok: false, - compacted: false, - reason: error ?? `Unknown model: ${provider}/${modelId}`, - }; - } - try { - const apiKeyInfo = await getApiKeyForModel({ - model, - cfg: params.config, - agentDir, - }); - - if (!apiKeyInfo.apiKey) { - if (apiKeyInfo.mode !== "aws-sdk") { - throw new Error( - `No API key resolved for provider "${model.provider}" (auth mode: ${apiKeyInfo.mode}).`, - ); - } - } else if (model.provider === "github-copilot") { - const { resolveCopilotApiToken } = - await import("../../providers/github-copilot-token.js"); - const copilotToken = await resolveCopilotApiToken({ - githubToken: apiKeyInfo.apiKey, - }); - authStorage.setRuntimeApiKey(model.provider, copilotToken.token); - } else { - authStorage.setRuntimeApiKey(model.provider, apiKeyInfo.apiKey); - } - } catch (err) { - return { - ok: false, - compacted: false, - reason: describeUnknownError(err), - }; - } - - await fs.mkdir(resolvedWorkspace, { recursive: true }); - const sandboxSessionKey = params.sessionKey?.trim() || params.sessionId; - const sandbox = await resolveSandboxContext({ - config: params.config, - sessionKey: sandboxSessionKey, - workspaceDir: resolvedWorkspace, - }); - const effectiveWorkspace = sandbox?.enabled - ? sandbox.workspaceAccess === "rw" - ? resolvedWorkspace - : sandbox.workspaceDir - : resolvedWorkspace; - await fs.mkdir(effectiveWorkspace, { recursive: true }); - await ensureSessionHeader({ - sessionFile: params.sessionFile, - sessionId: params.sessionId, - cwd: effectiveWorkspace, - }); - - let restoreSkillEnv: (() => void) | undefined; - process.chdir(effectiveWorkspace); - try { - const shouldLoadSkillEntries = - !params.skillsSnapshot || !params.skillsSnapshot.resolvedSkills; - const skillEntries = shouldLoadSkillEntries - ? loadWorkspaceSkillEntries(effectiveWorkspace) - : []; - restoreSkillEnv = params.skillsSnapshot - ? applySkillEnvOverridesFromSnapshot({ - snapshot: params.skillsSnapshot, - config: params.config, - }) - : applySkillEnvOverrides({ - skills: skillEntries ?? [], - config: params.config, - }); - const skillsPrompt = resolveSkillsPromptForRun({ - skillsSnapshot: params.skillsSnapshot, - entries: shouldLoadSkillEntries ? skillEntries : undefined, - config: params.config, - workspaceDir: effectiveWorkspace, - }); - - const sessionLabel = params.sessionKey ?? params.sessionId; - const { contextFiles } = await resolveBootstrapContextForRun({ - workspaceDir: effectiveWorkspace, - config: params.config, - sessionKey: params.sessionKey, - sessionId: params.sessionId, - warn: makeBootstrapWarn({ sessionLabel, warn: (message) => log.warn(message) }), - }); - const runAbortController = new AbortController(); - const toolsRaw = createClawdbotCodingTools({ - exec: { - ...resolveExecToolDefaults(params.config), - elevated: params.bashElevated, - }, - sandbox, - messageProvider: params.messageChannel ?? params.messageProvider, - agentAccountId: params.agentAccountId, - sessionKey: params.sessionKey ?? params.sessionId, - groupId: params.groupId, - groupChannel: params.groupChannel, - groupSpace: params.groupSpace, - spawnedBy: params.spawnedBy, - agentDir, - workspaceDir: effectiveWorkspace, - config: params.config, - abortSignal: runAbortController.signal, - modelProvider: model.provider, - modelId, - modelAuthMode: resolveModelAuthMode(model.provider, params.config), - }); - const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider }); - logToolSchemasForGoogle({ tools, provider }); - const machineName = await getMachineDisplayName(); - const runtimeChannel = normalizeMessageChannel( - params.messageChannel ?? params.messageProvider, - ); - let runtimeCapabilities = runtimeChannel - ? (resolveChannelCapabilities({ - cfg: params.config, - channel: runtimeChannel, - accountId: params.agentAccountId, - }) ?? []) - : undefined; - if (runtimeChannel === "telegram" && params.config) { - const inlineButtonsScope = resolveTelegramInlineButtonsScope({ - cfg: params.config, - accountId: params.agentAccountId ?? undefined, - }); - if (inlineButtonsScope !== "off") { - if (!runtimeCapabilities) runtimeCapabilities = []; - if ( - !runtimeCapabilities.some( - (cap) => String(cap).trim().toLowerCase() === "inlinebuttons", - ) - ) { - runtimeCapabilities.push("inlineButtons"); - } - } - } - // Resolve channel-specific message actions for system prompt - const channelActions = runtimeChannel - ? listChannelSupportedActions({ - cfg: params.config, - channel: runtimeChannel, - }) - : undefined; - const messageToolHints = runtimeChannel - ? resolveChannelMessageToolHints({ - cfg: params.config, - channel: runtimeChannel, - accountId: params.agentAccountId, - }) - : undefined; - - const runtimeInfo = { - host: machineName, - os: `${os.type()} ${os.release()}`, - arch: os.arch(), - node: process.version, - model: `${provider}/${modelId}`, - channel: runtimeChannel, - capabilities: runtimeCapabilities, - channelActions, - }; - const sandboxInfo = buildEmbeddedSandboxInfo(sandbox, params.bashElevated); - const reasoningTagHint = isReasoningTagProvider(provider); - const userTimezone = resolveUserTimezone(params.config?.agents?.defaults?.userTimezone); - const userTimeFormat = resolveUserTimeFormat(params.config?.agents?.defaults?.timeFormat); - const userTime = formatUserTime(new Date(), userTimezone, userTimeFormat); - const { defaultAgentId, sessionAgentId } = resolveSessionAgentIds({ - sessionKey: params.sessionKey, - config: params.config, - }); - const isDefaultAgent = sessionAgentId === defaultAgentId; - const promptMode = isSubagentSessionKey(params.sessionKey) ? "minimal" : "full"; - const docsPath = await resolveClawdbotDocsPath({ - workspaceDir: effectiveWorkspace, - argv1: process.argv[1], - cwd: process.cwd(), - moduleUrl: import.meta.url, - }); - const ttsHint = params.config ? buildTtsSystemPromptHint(params.config) : undefined; - const appendPrompt = buildEmbeddedSystemPrompt({ - workspaceDir: effectiveWorkspace, - defaultThinkLevel: params.thinkLevel, - reasoningLevel: params.reasoningLevel ?? "off", - extraSystemPrompt: params.extraSystemPrompt, - ownerNumbers: params.ownerNumbers, - reasoningTagHint, - heartbeatPrompt: isDefaultAgent - ? resolveHeartbeatPrompt(params.config?.agents?.defaults?.heartbeat?.prompt) - : undefined, - skillsPrompt, - docsPath: docsPath ?? undefined, - ttsHint, - promptMode, - runtimeInfo, - messageToolHints, - sandboxInfo, - tools, - modelAliasLines: buildModelAliasLines(params.config), - userTimezone, - userTime, - userTimeFormat, - contextFiles, - }); - const systemPrompt = createSystemPromptOverride(appendPrompt); - - const sessionLock = await acquireSessionWriteLock({ - sessionFile: params.sessionFile, - }); - try { - await prewarmSessionFile(params.sessionFile); - const transcriptPolicy = resolveTranscriptPolicy({ - modelApi: model.api, - provider, - modelId, - }); - const sessionManager = guardSessionManager(SessionManager.open(params.sessionFile), { - agentId: sessionAgentId, - sessionKey: params.sessionKey, - allowSyntheticToolResults: transcriptPolicy.allowSyntheticToolResults, - }); - trackSessionManagerAccess(params.sessionFile); - const settingsManager = SettingsManager.create(effectiveWorkspace, agentDir); - ensurePiCompactionReserveTokens({ - settingsManager, - minReserveTokens: resolveCompactionReserveTokensFloor(params.config), - }); - const additionalExtensionPaths = buildEmbeddedExtensionPaths({ - cfg: params.config, - sessionManager, - provider, - modelId, - model, - }); - - const { builtInTools, customTools } = splitSdkTools({ - tools, - sandboxEnabled: !!sandbox?.enabled, - }); - - let session: Awaited>["session"]; - ({ session } = await createAgentSession({ - cwd: resolvedWorkspace, - agentDir, - authStorage, - modelRegistry, - model, - thinkingLevel: mapThinkingLevel(params.thinkLevel), - systemPrompt, - tools: builtInTools, - customTools, - sessionManager, - settingsManager, - skills: [], - contextFiles: [], - additionalExtensionPaths, - })); - - try { - const prior = await sanitizeSessionHistory({ - messages: session.messages, - modelApi: model.api, - modelId, - provider, - sessionManager, - sessionId: params.sessionId, - policy: transcriptPolicy, - }); - const validatedGemini = transcriptPolicy.validateGeminiTurns - ? validateGeminiTurns(prior) - : prior; - const validated = transcriptPolicy.validateAnthropicTurns - ? validateAnthropicTurns(validatedGemini) - : validatedGemini; - const limited = limitHistoryTurns( - validated, - getDmHistoryLimitFromSessionKey(params.sessionKey, params.config), - ); - if (limited.length > 0) { - session.agent.replaceMessages(limited); - } - const result = await session.compact(params.customInstructions); - // Estimate tokens after compaction by summing token estimates for remaining messages - let tokensAfter: number | undefined; - try { - tokensAfter = 0; - for (const message of session.messages) { - tokensAfter += estimateTokens(message); - } - // Sanity check: tokensAfter should be less than tokensBefore - if (tokensAfter > result.tokensBefore) { - tokensAfter = undefined; // Don't trust the estimate - } - } catch { - // If estimation fails, leave tokensAfter undefined - tokensAfter = undefined; - } - return { - ok: true, - compacted: true, - result: { - summary: result.summary, - firstKeptEntryId: result.firstKeptEntryId, - tokensBefore: result.tokensBefore, - tokensAfter, - details: result.details, - }, - }; - } finally { - sessionManager.flushPendingToolResults?.(); - session.dispose(); - } - } finally { - await sessionLock.release(); - } - } catch (err) { - return { - ok: false, - compacted: false, - reason: describeUnknownError(err), - }; - } finally { - restoreSkillEnv?.(); - process.chdir(prevCwd); - } - }), + enqueueGlobal(async () => compactEmbeddedPiSessionDirect(params)), ); } diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts new file mode 100644 index 000000000..30b4dddf0 --- /dev/null +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -0,0 +1,281 @@ +import { describe, expect, it, vi, beforeEach } from "vitest"; + +vi.mock("./run/attempt.js", () => ({ + runEmbeddedAttempt: vi.fn(), +})); + +vi.mock("./compact.js", () => ({ + compactEmbeddedPiSessionDirect: vi.fn(), +})); + +vi.mock("./model.js", () => ({ + resolveModel: vi.fn(() => ({ + model: { + id: "test-model", + provider: "anthropic", + contextWindow: 200000, + api: "messages", + }, + error: null, + authStorage: { + setRuntimeApiKey: vi.fn(), + }, + modelRegistry: {}, + })), +})); + +vi.mock("../model-auth.js", () => ({ + ensureAuthProfileStore: vi.fn(() => ({})), + getApiKeyForModel: vi.fn(async () => ({ + apiKey: "test-key", + source: "test", + })), + resolveAuthProfileOrder: vi.fn(() => []), +})); + +vi.mock("../models-config.js", () => ({ + ensureClawdbotModelsJson: vi.fn(async () => {}), +})); + +vi.mock("../context-window-guard.js", () => ({ + CONTEXT_WINDOW_HARD_MIN_TOKENS: 1000, + CONTEXT_WINDOW_WARN_BELOW_TOKENS: 5000, + evaluateContextWindowGuard: vi.fn(() => ({ + shouldWarn: false, + shouldBlock: false, + tokens: 200000, + source: "model", + })), + resolveContextWindowInfo: vi.fn(() => ({ + tokens: 200000, + source: "model", + })), +})); + +vi.mock("../../process/command-queue.js", () => ({ + enqueueCommandInLane: vi.fn((_lane: string, task: () => unknown) => task()), +})); + +vi.mock("../../utils.js", () => ({ + resolveUserPath: vi.fn((p: string) => p), +})); + +vi.mock("../../utils/message-channel.js", () => ({ + isMarkdownCapableMessageChannel: vi.fn(() => true), +})); + +vi.mock("../agent-paths.js", () => ({ + resolveClawdbotAgentDir: vi.fn(() => "/tmp/agent-dir"), +})); + +vi.mock("../auth-profiles.js", () => ({ + markAuthProfileFailure: vi.fn(async () => {}), + markAuthProfileGood: vi.fn(async () => {}), + markAuthProfileUsed: vi.fn(async () => {}), +})); + +vi.mock("../defaults.js", () => ({ + DEFAULT_CONTEXT_TOKENS: 200000, + DEFAULT_MODEL: "test-model", + DEFAULT_PROVIDER: "anthropic", +})); + +vi.mock("../failover-error.js", () => ({ + FailoverError: class extends Error { + constructor(msg: string) { + super(msg); + } + }, + resolveFailoverStatus: vi.fn(), +})); + +vi.mock("../usage.js", () => ({ + normalizeUsage: vi.fn(() => undefined), +})); + +vi.mock("./lanes.js", () => ({ + resolveSessionLane: vi.fn(() => "session-lane"), + resolveGlobalLane: vi.fn(() => "global-lane"), +})); + +vi.mock("./logger.js", () => ({ + log: { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), + }, +})); + +vi.mock("./run/payloads.js", () => ({ + buildEmbeddedRunPayloads: vi.fn(() => []), +})); + +vi.mock("./utils.js", () => ({ + describeUnknownError: vi.fn((err: unknown) => { + if (err instanceof Error) return err.message; + return String(err); + }), +})); + +vi.mock("../pi-embedded-helpers.js", async () => { + return { + isCompactionFailureError: (msg?: string) => { + if (!msg) return false; + const lower = msg.toLowerCase(); + return lower.includes("request_too_large") && lower.includes("summarization failed"); + }, + isContextOverflowError: (msg?: string) => { + if (!msg) return false; + const lower = msg.toLowerCase(); + return lower.includes("request_too_large") || lower.includes("request size exceeds"); + }, + isFailoverAssistantError: vi.fn(() => false), + isFailoverErrorMessage: vi.fn(() => false), + isAuthAssistantError: vi.fn(() => false), + isRateLimitAssistantError: vi.fn(() => false), + classifyFailoverReason: vi.fn(() => null), + formatAssistantErrorText: vi.fn(() => ""), + pickFallbackThinkingLevel: vi.fn(() => null), + isTimeoutErrorMessage: vi.fn(() => false), + parseImageDimensionError: vi.fn(() => null), + }; +}); + +import { runEmbeddedPiAgent } from "./run.js"; +import { runEmbeddedAttempt } from "./run/attempt.js"; +import { compactEmbeddedPiSessionDirect } from "./compact.js"; +import { log } from "./logger.js"; + +import type { EmbeddedRunAttemptResult } from "./run/types.js"; + +const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt); +const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect); + +function makeAttemptResult( + overrides: Partial = {}, +): EmbeddedRunAttemptResult { + return { + aborted: false, + timedOut: false, + promptError: null, + sessionIdUsed: "test-session", + assistantTexts: ["Hello!"], + toolMetas: [], + lastAssistant: undefined, + messagesSnapshot: [], + didSendViaMessagingTool: false, + messagingToolSentTexts: [], + messagingToolSentTargets: [], + cloudCodeAssistFormatError: false, + ...overrides, + }; +} + +const baseParams = { + sessionId: "test-session", + sessionKey: "test-key", + sessionFile: "/tmp/session.json", + workspaceDir: "/tmp/workspace", + prompt: "hello", + timeoutMs: 30000, + runId: "run-1", +}; + +describe("overflow compaction in run loop", () => { + beforeEach(() => { + vi.clearAllMocks(); + }); + + it("retries after successful compaction on context overflow promptError", async () => { + const overflowError = new Error("request_too_large: Request size exceeds model context window"); + + mockedRunEmbeddedAttempt + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + + mockedCompactDirect.mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { + summary: "Compacted session", + firstKeptEntryId: "entry-5", + tokensBefore: 150000, + }, + }); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(log.warn).toHaveBeenCalledWith( + expect.stringContaining("context overflow detected; attempting auto-compaction"), + ); + expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded")); + // Should not be an error result + expect(result.meta.error).toBeUndefined(); + }); + + it("returns error if compaction fails", async () => { + const overflowError = new Error("request_too_large: Request size exceeds model context window"); + + mockedRunEmbeddedAttempt.mockResolvedValue(makeAttemptResult({ promptError: overflowError })); + + mockedCompactDirect.mockResolvedValueOnce({ + ok: false, + compacted: false, + reason: "nothing to compact", + }); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(result.meta.error?.kind).toBe("context_overflow"); + expect(result.payloads?.[0]?.isError).toBe(true); + expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed")); + }); + + it("returns error if overflow happens again after compaction", async () => { + const overflowError = new Error("request_too_large: Request size exceeds model context window"); + + mockedRunEmbeddedAttempt + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })); + + mockedCompactDirect.mockResolvedValueOnce({ + ok: true, + compacted: true, + result: { + summary: "Compacted", + firstKeptEntryId: "entry-3", + tokensBefore: 180000, + }, + }); + + const result = await runEmbeddedPiAgent(baseParams); + + // Compaction attempted only once + expect(mockedCompactDirect).toHaveBeenCalledTimes(1); + // Two attempts: first overflow -> compact -> retry -> second overflow -> return error + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); + expect(result.meta.error?.kind).toBe("context_overflow"); + expect(result.payloads?.[0]?.isError).toBe(true); + }); + + it("does not attempt compaction for compaction_failure errors", async () => { + const compactionFailureError = new Error( + "request_too_large: summarization failed - Request size exceeds model context window", + ); + + mockedRunEmbeddedAttempt.mockResolvedValue( + makeAttemptResult({ promptError: compactionFailureError }), + ); + + const result = await runEmbeddedPiAgent(baseParams); + + expect(mockedCompactDirect).not.toHaveBeenCalled(); + expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(1); + expect(result.meta.error?.kind).toBe("compaction_failure"); + }); +}); diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 201fb4fce..556ad3ae7 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -42,6 +42,7 @@ import { } from "../pi-embedded-helpers.js"; import { normalizeUsage, type UsageLike } from "../usage.js"; +import { compactEmbeddedPiSessionDirect } from "./compact.js"; import { resolveGlobalLane, resolveSessionLane } from "./lanes.js"; import { log } from "./logger.js"; import { resolveModel } from "./model.js"; @@ -290,6 +291,7 @@ export async function runEmbeddedPiAgent( } } + let overflowCompactionAttempted = false; try { while (true) { attemptedThinking.add(thinkLevel); @@ -358,9 +360,41 @@ export async function runEmbeddedPiAgent( if (promptError && !aborted) { const errorText = describeUnknownError(promptError); if (isContextOverflowError(errorText)) { - const kind = isCompactionFailureError(errorText) - ? "compaction_failure" - : "context_overflow"; + const isCompactionFailure = isCompactionFailureError(errorText); + // Attempt auto-compaction on context overflow (not compaction_failure) + if (!isCompactionFailure && !overflowCompactionAttempted) { + log.warn( + `context overflow detected; attempting auto-compaction for ${provider}/${modelId}`, + ); + overflowCompactionAttempted = true; + const compactResult = await compactEmbeddedPiSessionDirect({ + sessionId: params.sessionId, + sessionKey: params.sessionKey, + messageChannel: params.messageChannel, + messageProvider: params.messageProvider, + agentAccountId: params.agentAccountId, + sessionFile: params.sessionFile, + workspaceDir: params.workspaceDir, + agentDir, + config: params.config, + skillsSnapshot: params.skillsSnapshot, + provider, + model: modelId, + thinkLevel, + reasoningLevel: params.reasoningLevel, + bashElevated: params.bashElevated, + extraSystemPrompt: params.extraSystemPrompt, + ownerNumbers: params.ownerNumbers, + }); + if (compactResult.compacted) { + log.info(`auto-compaction succeeded for ${provider}/${modelId}; retrying prompt`); + continue; + } + log.warn( + `auto-compaction failed for ${provider}/${modelId}: ${compactResult.reason ?? "nothing to compact"}`, + ); + } + const kind = isCompactionFailure ? "compaction_failure" : "context_overflow"; return { payloads: [ {