fix: add rate limit patterns for 'too many tokens' and 'tokens per day' (#39377)

Merged via squash.

Prepared head SHA: 132a45728694053c0e3220e7d861508524f17244
Co-authored-by: gambletan <266203672+gambletan@users.noreply.github.com>
Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com>
Reviewed-by: @altaywtf
This commit is contained in:
gambletan
2026-03-08 18:03:33 +08:00
committed by GitHub
parent aedf3ee68f
commit 8a20f51460
7 changed files with 98 additions and 3 deletions

View File

@@ -416,12 +416,19 @@ describe("isLikelyContextOverflowError", () => {
"exceeded your current quota",
"This request would exceed your account's rate limit",
"429 Too Many Requests: request exceeds rate limit",
"AWS Bedrock: Too many tokens per day. Please try again tomorrow.",
];
for (const sample of samples) {
expect(isLikelyContextOverflowError(sample)).toBe(false);
}
});
it("keeps too-many-tokens-per-request context overflow errors out of the rate-limit lane", () => {
const sample = "Context window exceeded: too many tokens per request.";
expect(isLikelyContextOverflowError(sample)).toBe(true);
expect(classifyFailoverReason(sample)).toBeNull();
});
it("excludes reasoning-required invalid-request errors", () => {
const samples = [
"400 Reasoning is mandatory for this endpoint and cannot be disabled.",
@@ -654,6 +661,11 @@ describe("classifyFailoverReason", () => {
"rate_limit",
);
});
it("classifies AWS Bedrock too-many-tokens-per-day errors as rate_limit", () => {
expect(
classifyFailoverReason("AWS Bedrock: Too many tokens per day. Please try again tomorrow."),
).toBe("rate_limit");
});
it("classifies provider high-demand / service-unavailable messages as overloaded", () => {
expect(
classifyFailoverReason(

View File

@@ -122,7 +122,7 @@ const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
const CONTEXT_OVERFLOW_HINT_RE =
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
const RATE_LIMIT_HINT_RE =
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b|tokens per day/i;
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {

View File

@@ -14,6 +14,7 @@ const ERROR_PATTERNS = {
"usage limit",
/\btpm\b/i,
"tokens per minute",
"tokens per day",
],
overloaded: [
/overloaded_error|"type"\s*:\s*"overloaded_error"/i,

View File

@@ -800,6 +800,61 @@ describe("Cron issue regressions", () => {
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2);
});
it("#38822: one-shot job retries Bedrock too-many-tokens-per-day errors", async () => {
const store = makeStorePath();
const scheduledAt = Date.parse("2026-03-08T10:00:00.000Z");
const cronJob = createIsolatedRegressionJob({
id: "oneshot-bedrock-too-many-tokens-per-day",
name: "reminder",
scheduledAt,
schedule: { kind: "at", at: new Date(scheduledAt).toISOString() },
payload: { kind: "agentTurn", message: "remind me" },
state: { nextRunAtMs: scheduledAt },
});
await writeCronJobs(store.storePath, [cronJob]);
let now = scheduledAt;
const runIsolatedAgentJob = vi
.fn()
.mockResolvedValueOnce({
status: "error",
error: "AWS Bedrock: Too many tokens per day. Please try again tomorrow.",
})
.mockResolvedValueOnce({ status: "ok", summary: "done" });
const state = createCronServiceState({
cronEnabled: true,
storePath: store.storePath,
log: noopLogger,
nowMs: () => now,
enqueueSystemEvent: vi.fn(),
requestHeartbeatNow: vi.fn(),
runIsolatedAgentJob,
cronConfig: {
retry: { maxAttempts: 1, backoffMs: [1000], retryOn: ["rate_limit"] },
},
});
await onTimer(state);
const jobAfterRetry = state.store?.jobs.find(
(j) => j.id === "oneshot-bedrock-too-many-tokens-per-day",
);
expect(jobAfterRetry).toBeDefined();
expect(jobAfterRetry!.enabled).toBe(true);
expect(jobAfterRetry!.state.lastStatus).toBe("error");
expect(jobAfterRetry!.state.nextRunAtMs).toBeGreaterThan(scheduledAt);
now = (jobAfterRetry!.state.nextRunAtMs ?? now) + 1;
await onTimer(state);
const finishedJob = state.store?.jobs.find(
(j) => j.id === "oneshot-bedrock-too-many-tokens-per-day",
);
expect(finishedJob).toBeDefined();
expect(finishedJob!.state.lastStatus).toBe("ok");
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2);
});
it("#24355: one-shot job disabled immediately on permanent error", async () => {
const store = makeStorePath();
const scheduledAt = Date.parse("2026-02-06T10:00:00.000Z");

View File

@@ -119,7 +119,8 @@ function errorBackoffMs(
const DEFAULT_MAX_TRANSIENT_RETRIES = 3;
const TRANSIENT_PATTERNS: Record<string, RegExp> = {
rate_limit: /(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare)/i,
rate_limit:
/(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare|tokens per day)/i,
overloaded:
/\b529\b|\boverloaded(?:_error)?\b|high demand|temporar(?:ily|y) overloaded|capacity exceeded/i,
network: /(network|econnreset|econnrefused|fetch failed|socket)/i,

View File

@@ -532,7 +532,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
}
private isRetryableEmbeddingError(message: string): boolean {
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare)/i.test(
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
message,
);
}

View File

@@ -103,6 +103,32 @@ describe("memory embedding batches", () => {
expect(calls).toBe(3);
}, 10000);
it("retries embeddings on too-many-tokens-per-day rate limits", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();
const line = "e".repeat(120);
const content = Array.from({ length: 4 }, () => line).join("\n");
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
let calls = 0;
embedBatch.mockImplementation(async (texts: string[]) => {
calls += 1;
if (calls === 1) {
throw new Error("AWS Bedrock embeddings failed: Too many tokens per day");
}
return texts.map(() => [0, 1, 0]);
});
const restoreFastTimeouts = useFastShortTimeouts();
try {
await managerSmall.sync({ reason: "test" });
} finally {
restoreFastTimeouts();
}
expect(calls).toBe(2);
}, 10000);
it("skips empty chunks so embeddings input stays valid", async () => {
const memoryDir = fx.getMemoryDir();
const managerSmall = fx.getManagerSmall();