From 32ee2f0109a4ecfe44c64969b7438cd30a6a3656 Mon Sep 17 00:00:00 2001 From: Madoka Date: Sat, 28 Feb 2026 08:41:08 +0800 Subject: [PATCH] fix(feishu): break infinite typing-indicator retry loop on rate-limit / quota errors (openclaw#28494) thanks @guoqunabc Verified: - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: guoqunabc <9532020+guoqunabc@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> --- CHANGELOG.md | 1 + extensions/feishu/src/typing.test.ts | 144 +++++++++++++++++++++++++++ extensions/feishu/src/typing.ts | 123 ++++++++++++++++++++++- 3 files changed, 263 insertions(+), 5 deletions(-) create mode 100644 extensions/feishu/src/typing.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b5aee479e..5bc384662 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Feishu/Typing backoff: re-throw Feishu typing add/remove rate-limit and quota errors (`429`, `99991400`, `99991403`) and detect SDK non-throwing backoff responses so the typing keepalive circuit breaker can stop retries instead of looping indefinitely. (#28494) - Feishu/Probe status caching: cache successful `probeFeishu()` bot-info results for 10 minutes (bounded cache with per-account keying) to reduce repeated status/onboarding probe API calls, while bypassing cache for failures and exceptions. (#28907) Thanks @Glucksberg. - Feishu/Opus media send type: send `.opus` attachments with `msg_type: "audio"` (instead of `"media"`) so Feishu voice messages deliver correctly while `.mp4` remains `msg_type: "media"` and documents remain `msg_type: "file"`. (#28269) Thanks @Glucksberg. - Feishu/Local media sends: propagate `mediaLocalRoots` through Feishu outbound media sending into `loadWebMedia` so local path attachments work with post-CVE local-root enforcement. (#27884) Thanks @joelnishanth. diff --git a/extensions/feishu/src/typing.test.ts b/extensions/feishu/src/typing.test.ts new file mode 100644 index 000000000..437677f4b --- /dev/null +++ b/extensions/feishu/src/typing.test.ts @@ -0,0 +1,144 @@ +import { describe, expect, it } from "vitest"; +import { isFeishuBackoffError, getBackoffCodeFromResponse, FeishuBackoffError } from "./typing.js"; + +describe("isFeishuBackoffError", () => { + it("returns true for HTTP 429 (AxiosError shape)", () => { + const err = { response: { status: 429, data: {} } }; + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("returns true for Feishu quota exceeded code 99991403", () => { + const err = { response: { status: 200, data: { code: 99991403 } } }; + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("returns true for Feishu rate limit code 99991400", () => { + const err = { response: { status: 200, data: { code: 99991400 } } }; + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("returns true for SDK error with code 429", () => { + const err = { code: 429, message: "too many requests" }; + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("returns true for SDK error with top-level code 99991403", () => { + const err = { code: 99991403, message: "quota exceeded" }; + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("returns false for other HTTP errors (e.g. 500)", () => { + const err = { response: { status: 500, data: {} } }; + expect(isFeishuBackoffError(err)).toBe(false); + }); + + it("returns false for non-rate-limit Feishu codes", () => { + const err = { response: { status: 200, data: { code: 99991401 } } }; + expect(isFeishuBackoffError(err)).toBe(false); + }); + + it("returns false for generic Error", () => { + expect(isFeishuBackoffError(new Error("network timeout"))).toBe(false); + }); + + it("returns false for null", () => { + expect(isFeishuBackoffError(null)).toBe(false); + }); + + it("returns false for undefined", () => { + expect(isFeishuBackoffError(undefined)).toBe(false); + }); + + it("returns false for string", () => { + expect(isFeishuBackoffError("429")).toBe(false); + }); + + it("returns true for 429 even without data", () => { + const err = { response: { status: 429 } }; + expect(isFeishuBackoffError(err)).toBe(true); + }); +}); + +describe("getBackoffCodeFromResponse", () => { + it("returns backoff code for response with quota exceeded code", () => { + const response = { code: 99991403, msg: "quota exceeded", data: null }; + expect(getBackoffCodeFromResponse(response)).toBe(response.code); + }); + + it("returns backoff code for response with rate limit code", () => { + const response = { code: 99991400, msg: "rate limit", data: null }; + expect(getBackoffCodeFromResponse(response)).toBe(response.code); + }); + + it("returns backoff code for response with code 429", () => { + const response = { code: 429, msg: "too many requests", data: null }; + expect(getBackoffCodeFromResponse(response)).toBe(response.code); + }); + + it("returns undefined for successful response (code 0)", () => { + const response = { code: 0, msg: "success", data: { reaction_id: "r1" } }; + expect(getBackoffCodeFromResponse(response)).toBeUndefined(); + }); + + it("returns undefined for other error codes", () => { + const response = { code: 99991401, msg: "other error", data: null }; + expect(getBackoffCodeFromResponse(response)).toBeUndefined(); + }); + + it("returns undefined for null", () => { + expect(getBackoffCodeFromResponse(null)).toBeUndefined(); + }); + + it("returns undefined for undefined", () => { + expect(getBackoffCodeFromResponse(undefined)).toBeUndefined(); + }); + + it("returns undefined for response without code field", () => { + const response = { data: { reaction_id: "r1" } }; + expect(getBackoffCodeFromResponse(response)).toBeUndefined(); + }); +}); + +describe("FeishuBackoffError", () => { + it("is detected by isFeishuBackoffError via .code property", () => { + const err = new FeishuBackoffError(99991403); + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("is detected for rate limit code 99991400", () => { + const err = new FeishuBackoffError(99991400); + expect(isFeishuBackoffError(err)).toBe(true); + }); + + it("has correct name and message", () => { + const err = new FeishuBackoffError(99991403); + expect(err.name).toBe("FeishuBackoffError"); + expect(err.message).toBe("Feishu API backoff: code 99991403"); + expect(err.code).toBe(99991403); + }); + + it("is an instance of Error", () => { + const err = new FeishuBackoffError(99991403); + expect(err instanceof Error).toBe(true); + }); + + it("survives catch-and-rethrow pattern", () => { + // Simulates the exact pattern in addTypingIndicator/removeTypingIndicator: + // thrown inside try, caught by catch, isFeishuBackoffError must match + let caught: unknown; + try { + try { + throw new FeishuBackoffError(99991403); + } catch (err) { + if (isFeishuBackoffError(err)) { + throw err; // re-thrown — this is the fix + } + // would be silently swallowed with plain Error + caught = "swallowed"; + } + } catch (err) { + caught = err; + } + expect(caught).toBeInstanceOf(FeishuBackoffError); + }); +}); diff --git a/extensions/feishu/src/typing.ts b/extensions/feishu/src/typing.ts index af72d95f9..7dba5fc29 100644 --- a/extensions/feishu/src/typing.ts +++ b/extensions/feishu/src/typing.ts @@ -7,13 +7,97 @@ import { createFeishuClient } from "./client.js"; // Full list: https://github.com/go-lark/lark/blob/main/emoji.go const TYPING_EMOJI = "Typing"; // Typing indicator emoji +/** + * Feishu API error codes that indicate the caller should back off. + * These must propagate to the typing circuit breaker so the keepalive loop + * can trip and stop retrying. + * + * - 99991400: Rate limit (too many requests per second) + * - 99991403: Monthly API call quota exceeded + * - 429: Standard HTTP 429 returned as a Feishu SDK error code + * + * @see https://open.feishu.cn/document/server-docs/api-call-guide/generic-error-code + */ +const FEISHU_BACKOFF_CODES = new Set([99991400, 99991403, 429]); + +/** + * Custom error class for Feishu backoff conditions detected from non-throwing + * SDK responses. Carries a numeric `.code` so that `isFeishuBackoffError()` + * recognises it when the error is caught downstream. + */ +export class FeishuBackoffError extends Error { + code: number; + constructor(code: number) { + super(`Feishu API backoff: code ${code}`); + this.name = "FeishuBackoffError"; + this.code = code; + } +} + export type TypingIndicatorState = { messageId: string; reactionId: string | null; }; /** - * Add a typing indicator (reaction) to a message + * Check whether an error represents a rate-limit or quota-exceeded condition + * from the Feishu API that should stop the typing keepalive loop. + * + * Handles two shapes: + * 1. AxiosError with `response.status` and `response.data.code` + * 2. Feishu SDK error with a top-level `code` property + */ +export function isFeishuBackoffError(err: unknown): boolean { + if (typeof err !== "object" || err === null) { + return false; + } + + // AxiosError shape: err.response.status / err.response.data.code + const response = (err as { response?: { status?: number; data?: { code?: number } } }).response; + if (response) { + if (response.status === 429) { + return true; + } + if (typeof response.data?.code === "number" && FEISHU_BACKOFF_CODES.has(response.data.code)) { + return true; + } + } + + // Feishu SDK error shape: err.code + const code = (err as { code?: number }).code; + if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) { + return true; + } + + return false; +} + +/** + * Check whether a Feishu SDK response object contains a backoff error code. + * + * The Feishu SDK sometimes returns a normal response (no throw) with an + * API-level error code in the response body. This must be detected so the + * circuit breaker can trip. See codex review on #28157. + */ +export function getBackoffCodeFromResponse(response: unknown): number | undefined { + if (typeof response !== "object" || response === null) { + return undefined; + } + const code = (response as { code?: number }).code; + if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) { + return code; + } + return undefined; +} + +/** + * Add a typing indicator (reaction) to a message. + * + * Rate-limit and quota errors are re-thrown so the circuit breaker in + * `createTypingCallbacks` (typing-start-guard) can trip and stop the + * keepalive loop. See #28062. + * + * Also checks for backoff codes in non-throwing SDK responses (#28157). */ export async function addTypingIndicator(params: { cfg: ClawdbotConfig; @@ -36,18 +120,34 @@ export async function addTypingIndicator(params: { }, }); + // Feishu SDK may return a normal response with an API-level error code + // instead of throwing. Detect backoff codes and throw to trip the breaker. + const backoffCode = getBackoffCodeFromResponse(response); + if (backoffCode !== undefined) { + console.log( + `[feishu] typing indicator response contains backoff code ${backoffCode}, stopping keepalive`, + ); + throw new FeishuBackoffError(backoffCode); + } + // eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK response type const reactionId = (response as any)?.data?.reaction_id ?? null; return { messageId, reactionId }; } catch (err) { - // Silently fail - typing indicator is not critical + if (isFeishuBackoffError(err)) { + console.log(`[feishu] typing indicator hit rate-limit/quota, stopping keepalive`); + throw err; + } + // Silently fail for other non-critical errors (e.g. message deleted, permission issues) console.log(`[feishu] failed to add typing indicator: ${err}`); return { messageId, reactionId: null }; } } /** - * Remove a typing indicator (reaction) from a message + * Remove a typing indicator (reaction) from a message. + * + * Rate-limit and quota errors are re-thrown for the same reason as above. */ export async function removeTypingIndicator(params: { cfg: ClawdbotConfig; @@ -67,14 +167,27 @@ export async function removeTypingIndicator(params: { const client = createFeishuClient(account); try { - await client.im.messageReaction.delete({ + const result = await client.im.messageReaction.delete({ path: { message_id: state.messageId, reaction_id: state.reactionId, }, }); + + // Check for backoff codes in non-throwing SDK responses + const backoffCode = getBackoffCodeFromResponse(result); + if (backoffCode !== undefined) { + console.log( + `[feishu] typing indicator removal response contains backoff code ${backoffCode}, stopping keepalive`, + ); + throw new FeishuBackoffError(backoffCode); + } } catch (err) { - // Silently fail - cleanup is not critical + if (isFeishuBackoffError(err)) { + console.log(`[feishu] typing indicator removal hit rate-limit/quota, stopping keepalive`); + throw err; + } + // Silently fail for other non-critical errors console.log(`[feishu] failed to remove typing indicator: ${err}`); } }