Feishu: cache failing probes (#29970)

* Feishu: cache failing probes * Changelog: add Feishu probe failure backoff note --------- Co-authored-by: bmendonca3 <208517100+bmendonca3@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
2026-03-02 18:37:07 -07:00
parent 1ca69c8fd7
commit cdc1ef85e8
3 changed files with 96 additions and 57 deletions
--- a/extensions/feishu/src/probe.test.ts
+++ b/extensions/feishu/src/probe.test.ts
@@ -59,7 +59,7 @@ describe("probeFeishu", () => {
    expect(requestFn).toHaveBeenCalledTimes(1);
  });

-  it("uses explicit timeout for bot info request", async () => {
+  it("passes the probe timeout to the Feishu request", async () => {
    const requestFn = setupClient({
      code: 0,
      bot: { bot_name: "TestBot", open_id: "ou_abc123" },
@@ -105,7 +105,6 @@ describe("probeFeishu", () => {
    expect(result).toMatchObject({ ok: false, error: "probe aborted" });
    expect(createFeishuClientMock).not.toHaveBeenCalled();
  });
-
  it("returns cached result on subsequent calls within TTL", async () => {
    const requestFn = setupClient({
      code: 0,
@@ -133,7 +132,7 @@ describe("probeFeishu", () => {
      await probeFeishu(creds);
      expect(requestFn).toHaveBeenCalledTimes(1);

-      // Advance time past the 10-minute TTL
+      // Advance time past the success TTL
      vi.advanceTimersByTime(10 * 60 * 1000 + 1);

      await probeFeishu(creds);
@@ -143,29 +142,48 @@ describe("probeFeishu", () => {
    }
  });

-  it("does not cache failed probe results (API error)", async () => {
-    const requestFn = makeRequestFn({ code: 99, msg: "token expired" });
-    createFeishuClientMock.mockReturnValue({ request: requestFn });
+  it("caches failed probe results (API error) for the error TTL", async () => {
+    vi.useFakeTimers();
+    try {
+      const requestFn = makeRequestFn({ code: 99, msg: "token expired" });
+      createFeishuClientMock.mockReturnValue({ request: requestFn });

-    const creds = { appId: "cli_123", appSecret: "secret" };
-    const first = await probeFeishu(creds);
-    expect(first).toMatchObject({ ok: false, error: "API error: token expired" });
+      const creds = { appId: "cli_123", appSecret: "secret" };
+      const first = await probeFeishu(creds);
+      const second = await probeFeishu(creds);
+      expect(first).toMatchObject({ ok: false, error: "API error: token expired" });
+      expect(second).toMatchObject({ ok: false, error: "API error: token expired" });
+      expect(requestFn).toHaveBeenCalledTimes(1);

-    // Second call should make a fresh request since failures are not cached
-    await probeFeishu(creds);
-    expect(requestFn).toHaveBeenCalledTimes(2);
+      vi.advanceTimersByTime(60 * 1000 + 1);
+
+      await probeFeishu(creds);
+      expect(requestFn).toHaveBeenCalledTimes(2);
+    } finally {
+      vi.useRealTimers();
+    }
  });

-  it("does not cache results when request throws", async () => {
-    const requestFn = vi.fn().mockRejectedValue(new Error("network error"));
-    createFeishuClientMock.mockReturnValue({ request: requestFn });
+  it("caches thrown request errors for the error TTL", async () => {
+    vi.useFakeTimers();
+    try {
+      const requestFn = vi.fn().mockRejectedValue(new Error("network error"));
+      createFeishuClientMock.mockReturnValue({ request: requestFn });

-    const creds = { appId: "cli_123", appSecret: "secret" };
-    const first = await probeFeishu(creds);
-    expect(first).toMatchObject({ ok: false, error: "network error" });
+      const creds = { appId: "cli_123", appSecret: "secret" };
+      const first = await probeFeishu(creds);
+      const second = await probeFeishu(creds);
+      expect(first).toMatchObject({ ok: false, error: "network error" });
+      expect(second).toMatchObject({ ok: false, error: "network error" });
+      expect(requestFn).toHaveBeenCalledTimes(1);

-    await probeFeishu(creds);
-    expect(requestFn).toHaveBeenCalledTimes(2);
+      vi.advanceTimersByTime(60 * 1000 + 1);
+
+      await probeFeishu(creds);
+      expect(requestFn).toHaveBeenCalledTimes(2);
+    } finally {
+      vi.useRealTimers();
+    }
  });

  it("caches per account independently", async () => {
--- a/extensions/feishu/src/probe.ts
+++ b/extensions/feishu/src/probe.ts
@@ -2,15 +2,16 @@ import { raceWithTimeoutAndAbort } from "./async.js";
 import { createFeishuClient, type FeishuClientCredentials } from "./client.js";
 import type { FeishuProbeResult } from "./types.js";

-/** Cache successful probe results to reduce API calls (bot info is static).
+/** Cache probe results to reduce repeated health-check calls.
 * Gateway health checks call probeFeishu() every minute; without caching this
 * burns ~43,200 calls/month, easily exceeding Feishu's free-tier quota.
- * A 10-min TTL cuts that to ~4,320 calls/month. (#26684) */
+ * Successful bot info is effectively static, while failures are cached briefly
+ * to avoid hammering the API during transient outages. */
 const probeCache = new Map<string, { result: FeishuProbeResult; expiresAt: number }>();
-const PROBE_CACHE_TTL_MS = 10 * 60 * 1000; // 10 minutes
+const PROBE_SUCCESS_TTL_MS = 10 * 60 * 1000; // 10 minutes
+const PROBE_ERROR_TTL_MS = 60 * 1000; // 1 minute
 const MAX_PROBE_CACHE_SIZE = 64;
 export const FEISHU_PROBE_REQUEST_TIMEOUT_MS = 10_000;
-
 export type ProbeFeishuOptions = {
  timeoutMs?: number;
  abortSignal?: AbortSignal;
@@ -23,6 +24,21 @@ type FeishuBotInfoResponse = {
  data?: { bot?: { bot_name?: string; open_id?: string } };
 };

+function setCachedProbeResult(
+  cacheKey: string,
+  result: FeishuProbeResult,
+  ttlMs: number,
+): FeishuProbeResult {
+  probeCache.set(cacheKey, { result, expiresAt: Date.now() + ttlMs });
+  if (probeCache.size > MAX_PROBE_CACHE_SIZE) {
+    const oldest = probeCache.keys().next().value;
+    if (oldest !== undefined) {
+      probeCache.delete(oldest);
+    }
+  }
+  return result;
+}
+
 export async function probeFeishu(
  creds?: FeishuClientCredentials,
  options: ProbeFeishuOptions = {},
@@ -78,11 +94,15 @@ export async function probeFeishu(
      };
    }
    if (responseResult.status === "timeout") {
-      return {
-        ok: false,
-        appId: creds.appId,
-        error: `probe timed out after ${timeoutMs}ms`,
-      };
+      return setCachedProbeResult(
+        cacheKey,
+        {
+          ok: false,
+          appId: creds.appId,
+          error: `probe timed out after ${timeoutMs}ms`,
+        },
+        PROBE_ERROR_TTL_MS,
+      );
    }

    const response = responseResult.value;
@@ -95,38 +115,38 @@ export async function probeFeishu(
    }

    if (response.code !== 0) {
-      return {
-        ok: false,
-        appId: creds.appId,
-        error: `API error: ${response.msg || `code ${response.code}`}`,
-      };
+      return setCachedProbeResult(
+        cacheKey,
+        {
+          ok: false,
+          appId: creds.appId,
+          error: `API error: ${response.msg || `code ${response.code}`}`,
+        },
+        PROBE_ERROR_TTL_MS,
+      );
    }

    const bot = response.bot || response.data?.bot;
-    const result: FeishuProbeResult = {
-      ok: true,
-      appId: creds.appId,
-      botName: bot?.bot_name,
-      botOpenId: bot?.open_id,
-    };
-
-    // Cache successful results only
-    probeCache.set(cacheKey, { result, expiresAt: Date.now() + PROBE_CACHE_TTL_MS });
-    // Evict oldest entry if cache exceeds max size
-    if (probeCache.size > MAX_PROBE_CACHE_SIZE) {
-      const oldest = probeCache.keys().next().value;
-      if (oldest !== undefined) {
-        probeCache.delete(oldest);
-      }
-    }
-
-    return result;
+    return setCachedProbeResult(
+      cacheKey,
+      {
+        ok: true,
+        appId: creds.appId,
+        botName: bot?.bot_name,
+        botOpenId: bot?.open_id,
+      },
+      PROBE_SUCCESS_TTL_MS,
+    );
  } catch (err) {
-    return {
-      ok: false,
-      appId: creds.appId,
-      error: err instanceof Error ? err.message : String(err),
-    };
+    return setCachedProbeResult(
+      cacheKey,
+      {
+        ok: false,
+        appId: creds.appId,
+        error: err instanceof Error ? err.message : String(err),
+      },
+      PROBE_ERROR_TTL_MS,
+    );
  }
 }