From 8a20f5146047d66c2d384c6a5d5fb646fbcc8249 Mon Sep 17 00:00:00 2001 From: gambletan Date: Sun, 8 Mar 2026 18:03:33 +0800 Subject: [PATCH] fix: add rate limit patterns for 'too many tokens' and 'tokens per day' (#39377) Merged via squash. Prepared head SHA: 132a45728694053c0e3220e7d861508524f17244 Co-authored-by: gambletan <266203672+gambletan@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf --- ...dded-helpers.isbillingerrormessage.test.ts | 12 ++++ src/agents/pi-embedded-helpers/errors.ts | 2 +- .../pi-embedded-helpers/failover-matches.ts | 1 + src/cron/service.issue-regressions.test.ts | 55 +++++++++++++++++++ src/cron/service/timer.ts | 3 +- src/memory/manager-embedding-ops.ts | 2 +- src/memory/manager.embedding-batches.test.ts | 26 +++++++++ 7 files changed, 98 insertions(+), 3 deletions(-) diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 8649f46f8..86fd90e71 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -416,12 +416,19 @@ describe("isLikelyContextOverflowError", () => { "exceeded your current quota", "This request would exceed your account's rate limit", "429 Too Many Requests: request exceeds rate limit", + "AWS Bedrock: Too many tokens per day. Please try again tomorrow.", ]; for (const sample of samples) { expect(isLikelyContextOverflowError(sample)).toBe(false); } }); + it("keeps too-many-tokens-per-request context overflow errors out of the rate-limit lane", () => { + const sample = "Context window exceeded: too many tokens per request."; + expect(isLikelyContextOverflowError(sample)).toBe(true); + expect(classifyFailoverReason(sample)).toBeNull(); + }); + it("excludes reasoning-required invalid-request errors", () => { const samples = [ "400 Reasoning is mandatory for this endpoint and cannot be disabled.", @@ -654,6 +661,11 @@ describe("classifyFailoverReason", () => { "rate_limit", ); }); + it("classifies AWS Bedrock too-many-tokens-per-day errors as rate_limit", () => { + expect( + classifyFailoverReason("AWS Bedrock: Too many tokens per day. Please try again tomorrow."), + ).toBe("rate_limit"); + }); it("classifies provider high-demand / service-unavailable messages as overloaded", () => { expect( classifyFailoverReason( diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index cd4701c9d..4cf347150 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -122,7 +122,7 @@ const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i; const CONTEXT_OVERFLOW_HINT_RE = /context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i; const RATE_LIMIT_HINT_RE = - /rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i; + /rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b|tokens per day/i; export function isLikelyContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) { diff --git a/src/agents/pi-embedded-helpers/failover-matches.ts b/src/agents/pi-embedded-helpers/failover-matches.ts index 6a7ce9d51..f2e0e3870 100644 --- a/src/agents/pi-embedded-helpers/failover-matches.ts +++ b/src/agents/pi-embedded-helpers/failover-matches.ts @@ -14,6 +14,7 @@ const ERROR_PATTERNS = { "usage limit", /\btpm\b/i, "tokens per minute", + "tokens per day", ], overloaded: [ /overloaded_error|"type"\s*:\s*"overloaded_error"/i, diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index 9aec71b73..54f341bc7 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -800,6 +800,61 @@ describe("Cron issue regressions", () => { expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2); }); + it("#38822: one-shot job retries Bedrock too-many-tokens-per-day errors", async () => { + const store = makeStorePath(); + const scheduledAt = Date.parse("2026-03-08T10:00:00.000Z"); + + const cronJob = createIsolatedRegressionJob({ + id: "oneshot-bedrock-too-many-tokens-per-day", + name: "reminder", + scheduledAt, + schedule: { kind: "at", at: new Date(scheduledAt).toISOString() }, + payload: { kind: "agentTurn", message: "remind me" }, + state: { nextRunAtMs: scheduledAt }, + }); + await writeCronJobs(store.storePath, [cronJob]); + + let now = scheduledAt; + const runIsolatedAgentJob = vi + .fn() + .mockResolvedValueOnce({ + status: "error", + error: "AWS Bedrock: Too many tokens per day. Please try again tomorrow.", + }) + .mockResolvedValueOnce({ status: "ok", summary: "done" }); + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob, + cronConfig: { + retry: { maxAttempts: 1, backoffMs: [1000], retryOn: ["rate_limit"] }, + }, + }); + + await onTimer(state); + const jobAfterRetry = state.store?.jobs.find( + (j) => j.id === "oneshot-bedrock-too-many-tokens-per-day", + ); + expect(jobAfterRetry).toBeDefined(); + expect(jobAfterRetry!.enabled).toBe(true); + expect(jobAfterRetry!.state.lastStatus).toBe("error"); + expect(jobAfterRetry!.state.nextRunAtMs).toBeGreaterThan(scheduledAt); + + now = (jobAfterRetry!.state.nextRunAtMs ?? now) + 1; + await onTimer(state); + + const finishedJob = state.store?.jobs.find( + (j) => j.id === "oneshot-bedrock-too-many-tokens-per-day", + ); + expect(finishedJob).toBeDefined(); + expect(finishedJob!.state.lastStatus).toBe("ok"); + expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2); + }); + it("#24355: one-shot job disabled immediately on permanent error", async () => { const store = makeStorePath(); const scheduledAt = Date.parse("2026-02-06T10:00:00.000Z"); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 8502f3b6f..3f50ca757 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -119,7 +119,8 @@ function errorBackoffMs( const DEFAULT_MAX_TRANSIENT_RETRIES = 3; const TRANSIENT_PATTERNS: Record = { - rate_limit: /(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare)/i, + rate_limit: + /(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare|tokens per day)/i, overloaded: /\b529\b|\boverloaded(?:_error)?\b|high demand|temporar(?:ily|y) overloaded|capacity exceeded/i, network: /(network|econnreset|econnrefused|fetch failed|socket)/i, diff --git a/src/memory/manager-embedding-ops.ts b/src/memory/manager-embedding-ops.ts index 6da8b7ffa..965058c8a 100644 --- a/src/memory/manager-embedding-ops.ts +++ b/src/memory/manager-embedding-ops.ts @@ -532,7 +532,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps { } private isRetryableEmbeddingError(message: string): boolean { - return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare)/i.test( + return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test( message, ); } diff --git a/src/memory/manager.embedding-batches.test.ts b/src/memory/manager.embedding-batches.test.ts index 1326eca71..1d81744f2 100644 --- a/src/memory/manager.embedding-batches.test.ts +++ b/src/memory/manager.embedding-batches.test.ts @@ -103,6 +103,32 @@ describe("memory embedding batches", () => { expect(calls).toBe(3); }, 10000); + it("retries embeddings on too-many-tokens-per-day rate limits", async () => { + const memoryDir = fx.getMemoryDir(); + const managerSmall = fx.getManagerSmall(); + const line = "e".repeat(120); + const content = Array.from({ length: 4 }, () => line).join("\n"); + await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content); + + let calls = 0; + embedBatch.mockImplementation(async (texts: string[]) => { + calls += 1; + if (calls === 1) { + throw new Error("AWS Bedrock embeddings failed: Too many tokens per day"); + } + return texts.map(() => [0, 1, 0]); + }); + + const restoreFastTimeouts = useFastShortTimeouts(); + try { + await managerSmall.sync({ reason: "test" }); + } finally { + restoreFastTimeouts(); + } + + expect(calls).toBe(2); + }, 10000); + it("skips empty chunks so embeddings input stays valid", async () => { const memoryDir = fx.getMemoryDir(); const managerSmall = fx.getManagerSmall();