fix(feishu): break infinite typing-indicator retry loop on rate-limit / quota errors (openclaw#28494) thanks @guoqunabc

Verified:
- pnpm build
- pnpm check
- pnpm test:macmini

Co-authored-by: guoqunabc <9532020+guoqunabc@users.noreply.github.com>
Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
This commit is contained in:
Madoka
2026-02-28 08:41:08 +08:00
committed by GitHub
parent 0e755ad99a
commit 32ee2f0109
3 changed files with 263 additions and 5 deletions

View File

@@ -15,6 +15,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Feishu/Typing backoff: re-throw Feishu typing add/remove rate-limit and quota errors (`429`, `99991400`, `99991403`) and detect SDK non-throwing backoff responses so the typing keepalive circuit breaker can stop retries instead of looping indefinitely. (#28494)
- Feishu/Probe status caching: cache successful `probeFeishu()` bot-info results for 10 minutes (bounded cache with per-account keying) to reduce repeated status/onboarding probe API calls, while bypassing cache for failures and exceptions. (#28907) Thanks @Glucksberg.
- Feishu/Opus media send type: send `.opus` attachments with `msg_type: "audio"` (instead of `"media"`) so Feishu voice messages deliver correctly while `.mp4` remains `msg_type: "media"` and documents remain `msg_type: "file"`. (#28269) Thanks @Glucksberg.
- Feishu/Local media sends: propagate `mediaLocalRoots` through Feishu outbound media sending into `loadWebMedia` so local path attachments work with post-CVE local-root enforcement. (#27884) Thanks @joelnishanth.

View File

@@ -0,0 +1,144 @@
import { describe, expect, it } from "vitest";
import { isFeishuBackoffError, getBackoffCodeFromResponse, FeishuBackoffError } from "./typing.js";
describe("isFeishuBackoffError", () => {
it("returns true for HTTP 429 (AxiosError shape)", () => {
const err = { response: { status: 429, data: {} } };
expect(isFeishuBackoffError(err)).toBe(true);
});
it("returns true for Feishu quota exceeded code 99991403", () => {
const err = { response: { status: 200, data: { code: 99991403 } } };
expect(isFeishuBackoffError(err)).toBe(true);
});
it("returns true for Feishu rate limit code 99991400", () => {
const err = { response: { status: 200, data: { code: 99991400 } } };
expect(isFeishuBackoffError(err)).toBe(true);
});
it("returns true for SDK error with code 429", () => {
const err = { code: 429, message: "too many requests" };
expect(isFeishuBackoffError(err)).toBe(true);
});
it("returns true for SDK error with top-level code 99991403", () => {
const err = { code: 99991403, message: "quota exceeded" };
expect(isFeishuBackoffError(err)).toBe(true);
});
it("returns false for other HTTP errors (e.g. 500)", () => {
const err = { response: { status: 500, data: {} } };
expect(isFeishuBackoffError(err)).toBe(false);
});
it("returns false for non-rate-limit Feishu codes", () => {
const err = { response: { status: 200, data: { code: 99991401 } } };
expect(isFeishuBackoffError(err)).toBe(false);
});
it("returns false for generic Error", () => {
expect(isFeishuBackoffError(new Error("network timeout"))).toBe(false);
});
it("returns false for null", () => {
expect(isFeishuBackoffError(null)).toBe(false);
});
it("returns false for undefined", () => {
expect(isFeishuBackoffError(undefined)).toBe(false);
});
it("returns false for string", () => {
expect(isFeishuBackoffError("429")).toBe(false);
});
it("returns true for 429 even without data", () => {
const err = { response: { status: 429 } };
expect(isFeishuBackoffError(err)).toBe(true);
});
});
describe("getBackoffCodeFromResponse", () => {
it("returns backoff code for response with quota exceeded code", () => {
const response = { code: 99991403, msg: "quota exceeded", data: null };
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
});
it("returns backoff code for response with rate limit code", () => {
const response = { code: 99991400, msg: "rate limit", data: null };
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
});
it("returns backoff code for response with code 429", () => {
const response = { code: 429, msg: "too many requests", data: null };
expect(getBackoffCodeFromResponse(response)).toBe(response.code);
});
it("returns undefined for successful response (code 0)", () => {
const response = { code: 0, msg: "success", data: { reaction_id: "r1" } };
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
});
it("returns undefined for other error codes", () => {
const response = { code: 99991401, msg: "other error", data: null };
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
});
it("returns undefined for null", () => {
expect(getBackoffCodeFromResponse(null)).toBeUndefined();
});
it("returns undefined for undefined", () => {
expect(getBackoffCodeFromResponse(undefined)).toBeUndefined();
});
it("returns undefined for response without code field", () => {
const response = { data: { reaction_id: "r1" } };
expect(getBackoffCodeFromResponse(response)).toBeUndefined();
});
});
describe("FeishuBackoffError", () => {
it("is detected by isFeishuBackoffError via .code property", () => {
const err = new FeishuBackoffError(99991403);
expect(isFeishuBackoffError(err)).toBe(true);
});
it("is detected for rate limit code 99991400", () => {
const err = new FeishuBackoffError(99991400);
expect(isFeishuBackoffError(err)).toBe(true);
});
it("has correct name and message", () => {
const err = new FeishuBackoffError(99991403);
expect(err.name).toBe("FeishuBackoffError");
expect(err.message).toBe("Feishu API backoff: code 99991403");
expect(err.code).toBe(99991403);
});
it("is an instance of Error", () => {
const err = new FeishuBackoffError(99991403);
expect(err instanceof Error).toBe(true);
});
it("survives catch-and-rethrow pattern", () => {
// Simulates the exact pattern in addTypingIndicator/removeTypingIndicator:
// thrown inside try, caught by catch, isFeishuBackoffError must match
let caught: unknown;
try {
try {
throw new FeishuBackoffError(99991403);
} catch (err) {
if (isFeishuBackoffError(err)) {
throw err; // re-thrown — this is the fix
}
// would be silently swallowed with plain Error
caught = "swallowed";
}
} catch (err) {
caught = err;
}
expect(caught).toBeInstanceOf(FeishuBackoffError);
});
});

View File

@@ -7,13 +7,97 @@ import { createFeishuClient } from "./client.js";
// Full list: https://github.com/go-lark/lark/blob/main/emoji.go
const TYPING_EMOJI = "Typing"; // Typing indicator emoji
/**
* Feishu API error codes that indicate the caller should back off.
* These must propagate to the typing circuit breaker so the keepalive loop
* can trip and stop retrying.
*
* - 99991400: Rate limit (too many requests per second)
* - 99991403: Monthly API call quota exceeded
* - 429: Standard HTTP 429 returned as a Feishu SDK error code
*
* @see https://open.feishu.cn/document/server-docs/api-call-guide/generic-error-code
*/
const FEISHU_BACKOFF_CODES = new Set([99991400, 99991403, 429]);
/**
* Custom error class for Feishu backoff conditions detected from non-throwing
* SDK responses. Carries a numeric `.code` so that `isFeishuBackoffError()`
* recognises it when the error is caught downstream.
*/
export class FeishuBackoffError extends Error {
code: number;
constructor(code: number) {
super(`Feishu API backoff: code ${code}`);
this.name = "FeishuBackoffError";
this.code = code;
}
}
export type TypingIndicatorState = {
messageId: string;
reactionId: string | null;
};
/**
* Add a typing indicator (reaction) to a message
* Check whether an error represents a rate-limit or quota-exceeded condition
* from the Feishu API that should stop the typing keepalive loop.
*
* Handles two shapes:
* 1. AxiosError with `response.status` and `response.data.code`
* 2. Feishu SDK error with a top-level `code` property
*/
export function isFeishuBackoffError(err: unknown): boolean {
if (typeof err !== "object" || err === null) {
return false;
}
// AxiosError shape: err.response.status / err.response.data.code
const response = (err as { response?: { status?: number; data?: { code?: number } } }).response;
if (response) {
if (response.status === 429) {
return true;
}
if (typeof response.data?.code === "number" && FEISHU_BACKOFF_CODES.has(response.data.code)) {
return true;
}
}
// Feishu SDK error shape: err.code
const code = (err as { code?: number }).code;
if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) {
return true;
}
return false;
}
/**
* Check whether a Feishu SDK response object contains a backoff error code.
*
* The Feishu SDK sometimes returns a normal response (no throw) with an
* API-level error code in the response body. This must be detected so the
* circuit breaker can trip. See codex review on #28157.
*/
export function getBackoffCodeFromResponse(response: unknown): number | undefined {
if (typeof response !== "object" || response === null) {
return undefined;
}
const code = (response as { code?: number }).code;
if (typeof code === "number" && FEISHU_BACKOFF_CODES.has(code)) {
return code;
}
return undefined;
}
/**
* Add a typing indicator (reaction) to a message.
*
* Rate-limit and quota errors are re-thrown so the circuit breaker in
* `createTypingCallbacks` (typing-start-guard) can trip and stop the
* keepalive loop. See #28062.
*
* Also checks for backoff codes in non-throwing SDK responses (#28157).
*/
export async function addTypingIndicator(params: {
cfg: ClawdbotConfig;
@@ -36,18 +120,34 @@ export async function addTypingIndicator(params: {
},
});
// Feishu SDK may return a normal response with an API-level error code
// instead of throwing. Detect backoff codes and throw to trip the breaker.
const backoffCode = getBackoffCodeFromResponse(response);
if (backoffCode !== undefined) {
console.log(
`[feishu] typing indicator response contains backoff code ${backoffCode}, stopping keepalive`,
);
throw new FeishuBackoffError(backoffCode);
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any -- SDK response type
const reactionId = (response as any)?.data?.reaction_id ?? null;
return { messageId, reactionId };
} catch (err) {
// Silently fail - typing indicator is not critical
if (isFeishuBackoffError(err)) {
console.log(`[feishu] typing indicator hit rate-limit/quota, stopping keepalive`);
throw err;
}
// Silently fail for other non-critical errors (e.g. message deleted, permission issues)
console.log(`[feishu] failed to add typing indicator: ${err}`);
return { messageId, reactionId: null };
}
}
/**
* Remove a typing indicator (reaction) from a message
* Remove a typing indicator (reaction) from a message.
*
* Rate-limit and quota errors are re-thrown for the same reason as above.
*/
export async function removeTypingIndicator(params: {
cfg: ClawdbotConfig;
@@ -67,14 +167,27 @@ export async function removeTypingIndicator(params: {
const client = createFeishuClient(account);
try {
await client.im.messageReaction.delete({
const result = await client.im.messageReaction.delete({
path: {
message_id: state.messageId,
reaction_id: state.reactionId,
},
});
// Check for backoff codes in non-throwing SDK responses
const backoffCode = getBackoffCodeFromResponse(result);
if (backoffCode !== undefined) {
console.log(
`[feishu] typing indicator removal response contains backoff code ${backoffCode}, stopping keepalive`,
);
throw new FeishuBackoffError(backoffCode);
}
} catch (err) {
// Silently fail - cleanup is not critical
if (isFeishuBackoffError(err)) {
console.log(`[feishu] typing indicator removal hit rate-limit/quota, stopping keepalive`);
throw err;
}
// Silently fail for other non-critical errors
console.log(`[feishu] failed to remove typing indicator: ${err}`);
}
}