fix: add rate limit patterns for 'too many tokens' and 'tokens per day' (#39377)
Merged via squash. Prepared head SHA: 132a45728694053c0e3220e7d861508524f17244 Co-authored-by: gambletan <266203672+gambletan@users.noreply.github.com> Co-authored-by: altaywtf <9790196+altaywtf@users.noreply.github.com> Reviewed-by: @altaywtf
This commit is contained in:
@@ -416,12 +416,19 @@ describe("isLikelyContextOverflowError", () => {
|
||||
"exceeded your current quota",
|
||||
"This request would exceed your account's rate limit",
|
||||
"429 Too Many Requests: request exceeds rate limit",
|
||||
"AWS Bedrock: Too many tokens per day. Please try again tomorrow.",
|
||||
];
|
||||
for (const sample of samples) {
|
||||
expect(isLikelyContextOverflowError(sample)).toBe(false);
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps too-many-tokens-per-request context overflow errors out of the rate-limit lane", () => {
|
||||
const sample = "Context window exceeded: too many tokens per request.";
|
||||
expect(isLikelyContextOverflowError(sample)).toBe(true);
|
||||
expect(classifyFailoverReason(sample)).toBeNull();
|
||||
});
|
||||
|
||||
it("excludes reasoning-required invalid-request errors", () => {
|
||||
const samples = [
|
||||
"400 Reasoning is mandatory for this endpoint and cannot be disabled.",
|
||||
@@ -654,6 +661,11 @@ describe("classifyFailoverReason", () => {
|
||||
"rate_limit",
|
||||
);
|
||||
});
|
||||
it("classifies AWS Bedrock too-many-tokens-per-day errors as rate_limit", () => {
|
||||
expect(
|
||||
classifyFailoverReason("AWS Bedrock: Too many tokens per day. Please try again tomorrow."),
|
||||
).toBe("rate_limit");
|
||||
});
|
||||
it("classifies provider high-demand / service-unavailable messages as overloaded", () => {
|
||||
expect(
|
||||
classifyFailoverReason(
|
||||
|
||||
@@ -122,7 +122,7 @@ const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
|
||||
const CONTEXT_OVERFLOW_HINT_RE =
|
||||
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
|
||||
const RATE_LIMIT_HINT_RE =
|
||||
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
|
||||
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b|tokens per day/i;
|
||||
|
||||
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
|
||||
if (!errorMessage) {
|
||||
|
||||
@@ -14,6 +14,7 @@ const ERROR_PATTERNS = {
|
||||
"usage limit",
|
||||
/\btpm\b/i,
|
||||
"tokens per minute",
|
||||
"tokens per day",
|
||||
],
|
||||
overloaded: [
|
||||
/overloaded_error|"type"\s*:\s*"overloaded_error"/i,
|
||||
|
||||
@@ -800,6 +800,61 @@ describe("Cron issue regressions", () => {
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("#38822: one-shot job retries Bedrock too-many-tokens-per-day errors", async () => {
|
||||
const store = makeStorePath();
|
||||
const scheduledAt = Date.parse("2026-03-08T10:00:00.000Z");
|
||||
|
||||
const cronJob = createIsolatedRegressionJob({
|
||||
id: "oneshot-bedrock-too-many-tokens-per-day",
|
||||
name: "reminder",
|
||||
scheduledAt,
|
||||
schedule: { kind: "at", at: new Date(scheduledAt).toISOString() },
|
||||
payload: { kind: "agentTurn", message: "remind me" },
|
||||
state: { nextRunAtMs: scheduledAt },
|
||||
});
|
||||
await writeCronJobs(store.storePath, [cronJob]);
|
||||
|
||||
let now = scheduledAt;
|
||||
const runIsolatedAgentJob = vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
status: "error",
|
||||
error: "AWS Bedrock: Too many tokens per day. Please try again tomorrow.",
|
||||
})
|
||||
.mockResolvedValueOnce({ status: "ok", summary: "done" });
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: store.storePath,
|
||||
log: noopLogger,
|
||||
nowMs: () => now,
|
||||
enqueueSystemEvent: vi.fn(),
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob,
|
||||
cronConfig: {
|
||||
retry: { maxAttempts: 1, backoffMs: [1000], retryOn: ["rate_limit"] },
|
||||
},
|
||||
});
|
||||
|
||||
await onTimer(state);
|
||||
const jobAfterRetry = state.store?.jobs.find(
|
||||
(j) => j.id === "oneshot-bedrock-too-many-tokens-per-day",
|
||||
);
|
||||
expect(jobAfterRetry).toBeDefined();
|
||||
expect(jobAfterRetry!.enabled).toBe(true);
|
||||
expect(jobAfterRetry!.state.lastStatus).toBe("error");
|
||||
expect(jobAfterRetry!.state.nextRunAtMs).toBeGreaterThan(scheduledAt);
|
||||
|
||||
now = (jobAfterRetry!.state.nextRunAtMs ?? now) + 1;
|
||||
await onTimer(state);
|
||||
|
||||
const finishedJob = state.store?.jobs.find(
|
||||
(j) => j.id === "oneshot-bedrock-too-many-tokens-per-day",
|
||||
);
|
||||
expect(finishedJob).toBeDefined();
|
||||
expect(finishedJob!.state.lastStatus).toBe("ok");
|
||||
expect(runIsolatedAgentJob).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("#24355: one-shot job disabled immediately on permanent error", async () => {
|
||||
const store = makeStorePath();
|
||||
const scheduledAt = Date.parse("2026-02-06T10:00:00.000Z");
|
||||
|
||||
@@ -119,7 +119,8 @@ function errorBackoffMs(
|
||||
const DEFAULT_MAX_TRANSIENT_RETRIES = 3;
|
||||
|
||||
const TRANSIENT_PATTERNS: Record<string, RegExp> = {
|
||||
rate_limit: /(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare)/i,
|
||||
rate_limit:
|
||||
/(rate[_ ]limit|too many requests|429|resource has been exhausted|cloudflare|tokens per day)/i,
|
||||
overloaded:
|
||||
/\b529\b|\boverloaded(?:_error)?\b|high demand|temporar(?:ily|y) overloaded|capacity exceeded/i,
|
||||
network: /(network|econnreset|econnrefused|fetch failed|socket)/i,
|
||||
|
||||
@@ -532,7 +532,7 @@ export abstract class MemoryManagerEmbeddingOps extends MemoryManagerSyncOps {
|
||||
}
|
||||
|
||||
private isRetryableEmbeddingError(message: string): boolean {
|
||||
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare)/i.test(
|
||||
return /(rate[_ ]limit|too many requests|429|resource has been exhausted|5\d\d|cloudflare|tokens per day)/i.test(
|
||||
message,
|
||||
);
|
||||
}
|
||||
|
||||
@@ -103,6 +103,32 @@ describe("memory embedding batches", () => {
|
||||
expect(calls).toBe(3);
|
||||
}, 10000);
|
||||
|
||||
it("retries embeddings on too-many-tokens-per-day rate limits", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
const line = "e".repeat(120);
|
||||
const content = Array.from({ length: 4 }, () => line).join("\n");
|
||||
await fs.writeFile(path.join(memoryDir, "2026-01-08.md"), content);
|
||||
|
||||
let calls = 0;
|
||||
embedBatch.mockImplementation(async (texts: string[]) => {
|
||||
calls += 1;
|
||||
if (calls === 1) {
|
||||
throw new Error("AWS Bedrock embeddings failed: Too many tokens per day");
|
||||
}
|
||||
return texts.map(() => [0, 1, 0]);
|
||||
});
|
||||
|
||||
const restoreFastTimeouts = useFastShortTimeouts();
|
||||
try {
|
||||
await managerSmall.sync({ reason: "test" });
|
||||
} finally {
|
||||
restoreFastTimeouts();
|
||||
}
|
||||
|
||||
expect(calls).toBe(2);
|
||||
}, 10000);
|
||||
|
||||
it("skips empty chunks so embeddings input stays valid", async () => {
|
||||
const memoryDir = fx.getMemoryDir();
|
||||
const managerSmall = fx.getManagerSmall();
|
||||
|
||||
Reference in New Issue
Block a user