From 7af6849c2f85e212dfcdeeabc7b1b4b82939db0e Mon Sep 17 00:00:00 2001 From: Theo Tarr Date: Sun, 22 Feb 2026 13:43:06 -0500 Subject: [PATCH] Discord: handle early gateway startup errors --- CHANGELOG.md | 1 + .../monitor/provider.lifecycle.test.ts | 105 ++++++++++++++++-- src/discord/monitor/provider.lifecycle.ts | 62 +++++++---- src/discord/monitor/provider.test.ts | 39 ++++++- src/discord/monitor/provider.ts | 34 ++++++ 5 files changed, 205 insertions(+), 36 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fe1e695a..5f4e3fb74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai - Telegram/Webhook: pre-initialize webhook bots, switch webhook processing to callback-mode JSON handling, and preserve full near-limit payload reads under delayed handlers to prevent webhook request hangs and dropped updates. (#26156) - Agents/Subagents delivery: refactor subagent completion announce dispatch into an explicit queue/direct/fallback state machine, recover outbound channel-plugin resolution in cold/stale plugin-registry states across announce/message/gateway send paths, finalize cleanup bookkeeping when announce flow rejects, and treat Telegram sends without `message_id` as delivery failures (instead of false-success `"unknown"` IDs). (#26867, #25961, #26803, #25069, #26741) Thanks @SmithLabsLLC and @docaohieu2808. - LINE/Lifecycle: keep LINE `startAccount` pending until abort so webhook startup is no longer misread as immediate channel exit, preventing restart-loop storms on LINE provider boot. (#26528) Thanks @Sid-Qin. +- Discord/Gateway: capture and drain startup-time gateway `error` events before lifecycle listeners attach so early `Fatal Gateway error: 4014` closes surface as actionable intent guidance instead of uncaught gateway crashes. (#23832) Thanks @theotarr. - Security/SSRF guard: classify IPv6 multicast literals (`ff00::/8`) as blocked/private-internal targets in shared SSRF IP checks, preventing multicast literals from bypassing URL-host preflight and DNS answer validation. This ships in the next npm release (`2026.2.25`). Thanks @zpbrent for reporting. - Slack/Session threads: prevent oversized parent-session inheritance from silently bricking new thread sessions, surface embedded context-overflow empty-result failures to users, and add configurable `session.parentForkMaxTokens` (default `100000`, `0` disables). (#26912) Thanks @markshields-tl. - Models/Auth probes: map permanent auth failover reasons (`auth_permanent`, for example revoked keys) into probe auth status instead of `unknown`, so `openclaw models status --probe` reports actionable auth failures. (#25754) thanks @rrenamed. diff --git a/src/discord/monitor/provider.lifecycle.test.ts b/src/discord/monitor/provider.lifecycle.test.ts index 9b74a0bad..e503d88cc 100644 --- a/src/discord/monitor/provider.lifecycle.test.ts +++ b/src/discord/monitor/provider.lifecycle.test.ts @@ -49,23 +49,33 @@ describe("runDiscordGatewayLifecycle", () => { accountId?: string; start?: () => Promise; stop?: () => Promise; + isDisallowedIntentsError?: (err: unknown) => boolean; + pendingGatewayErrors?: unknown[]; }) => { const start = vi.fn(params?.start ?? (async () => undefined)); const stop = vi.fn(params?.stop ?? (async () => undefined)); const threadStop = vi.fn(); + const runtimeError = vi.fn(); + const releaseEarlyGatewayErrorGuard = vi.fn(); return { start, stop, threadStop, + runtimeError, + releaseEarlyGatewayErrorGuard, lifecycleParams: { accountId: params?.accountId ?? "default", client: { getPlugin: vi.fn(() => undefined) } as unknown as Client, - runtime: {} as RuntimeEnv, - isDisallowedIntentsError: () => false, + runtime: { + error: runtimeError, + } as RuntimeEnv, + isDisallowedIntentsError: params?.isDisallowedIntentsError ?? (() => false), voiceManager: null, voiceManagerRef: { current: null }, execApprovalsHandler: { start, stop }, threadBindings: { stop: threadStop }, + pendingGatewayErrors: params?.pendingGatewayErrors, + releaseEarlyGatewayErrorGuard, }, }; }; @@ -75,6 +85,7 @@ describe("runDiscordGatewayLifecycle", () => { stop: ReturnType; threadStop: ReturnType; waitCalls: number; + releaseEarlyGatewayErrorGuard: ReturnType; }) { expect(params.start).toHaveBeenCalledTimes(1); expect(params.stop).toHaveBeenCalledTimes(1); @@ -82,39 +93,109 @@ describe("runDiscordGatewayLifecycle", () => { expect(unregisterGatewayMock).toHaveBeenCalledWith("default"); expect(stopGatewayLoggingMock).toHaveBeenCalledTimes(1); expect(params.threadStop).toHaveBeenCalledTimes(1); + expect(params.releaseEarlyGatewayErrorGuard).toHaveBeenCalledTimes(1); } it("cleans up thread bindings when exec approvals startup fails", async () => { const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js"); - const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness({ - start: async () => { - throw new Error("startup failed"); - }, - }); + const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } = + createLifecycleHarness({ + start: async () => { + throw new Error("startup failed"); + }, + }); await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow("startup failed"); - expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 0 }); + expectLifecycleCleanup({ + start, + stop, + threadStop, + waitCalls: 0, + releaseEarlyGatewayErrorGuard, + }); }); it("cleans up when gateway wait fails after startup", async () => { const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js"); waitForDiscordGatewayStopMock.mockRejectedValueOnce(new Error("gateway wait failed")); - const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness(); + const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } = + createLifecycleHarness(); await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow( "gateway wait failed", ); - expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 1 }); + expectLifecycleCleanup({ + start, + stop, + threadStop, + waitCalls: 1, + releaseEarlyGatewayErrorGuard, + }); }); it("cleans up after successful gateway wait", async () => { const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js"); - const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness(); + const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } = + createLifecycleHarness(); await expect(runDiscordGatewayLifecycle(lifecycleParams)).resolves.toBeUndefined(); - expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 1 }); + expectLifecycleCleanup({ + start, + stop, + threadStop, + waitCalls: 1, + releaseEarlyGatewayErrorGuard, + }); + }); + + it("handles queued disallowed intents errors without waiting for gateway events", async () => { + const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js"); + const { + lifecycleParams, + start, + stop, + threadStop, + runtimeError, + releaseEarlyGatewayErrorGuard, + } = createLifecycleHarness({ + pendingGatewayErrors: [new Error("Fatal Gateway error: 4014")], + isDisallowedIntentsError: (err) => String(err).includes("4014"), + }); + + await expect(runDiscordGatewayLifecycle(lifecycleParams)).resolves.toBeUndefined(); + + expect(runtimeError).toHaveBeenCalledWith( + expect.stringContaining("discord: gateway closed with code 4014"), + ); + expectLifecycleCleanup({ + start, + stop, + threadStop, + waitCalls: 0, + releaseEarlyGatewayErrorGuard, + }); + }); + + it("throws queued non-disallowed fatal gateway errors", async () => { + const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js"); + const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } = + createLifecycleHarness({ + pendingGatewayErrors: [new Error("Fatal Gateway error: 4000")], + }); + + await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow( + "Fatal Gateway error: 4000", + ); + + expectLifecycleCleanup({ + start, + stop, + threadStop, + waitCalls: 0, + releaseEarlyGatewayErrorGuard, + }); }); }); diff --git a/src/discord/monitor/provider.lifecycle.ts b/src/discord/monitor/provider.lifecycle.ts index 8e5177bb9..489657d08 100644 --- a/src/discord/monitor/provider.lifecycle.ts +++ b/src/discord/monitor/provider.lifecycle.ts @@ -22,6 +22,8 @@ export async function runDiscordGatewayLifecycle(params: { voiceManagerRef: { current: DiscordVoiceManager | null }; execApprovalsHandler: ExecApprovalsHandler | null; threadBindings: { stop: () => void }; + pendingGatewayErrors?: unknown[]; + releaseEarlyGatewayErrorGuard?: () => void; }) { const gateway = params.client.getPlugin("gateway"); if (gateway) { @@ -74,11 +76,48 @@ export async function runDiscordGatewayLifecycle(params: { gatewayEmitter?.on("debug", onGatewayDebug); let sawDisallowedIntents = false; + const logGatewayError = (err: unknown) => { + if (params.isDisallowedIntentsError(err)) { + sawDisallowedIntents = true; + params.runtime.error?.( + danger( + "discord: gateway closed with code 4014 (missing privileged gateway intents). Enable the required intents in the Discord Developer Portal or disable them in config.", + ), + ); + return; + } + params.runtime.error?.(danger(`discord gateway error: ${String(err)}`)); + }; + const shouldStopOnGatewayError = (err: unknown) => { + const message = String(err); + return ( + message.includes("Max reconnect attempts") || + message.includes("Fatal Gateway error") || + params.isDisallowedIntentsError(err) + ); + }; try { if (params.execApprovalsHandler) { await params.execApprovalsHandler.start(); } + // Drain gateway errors emitted before lifecycle listeners were attached. + const pendingGatewayErrors = params.pendingGatewayErrors ?? []; + if (pendingGatewayErrors.length > 0) { + const queuedErrors = [...pendingGatewayErrors]; + pendingGatewayErrors.length = 0; + for (const err of queuedErrors) { + logGatewayError(err); + if (!shouldStopOnGatewayError(err)) { + continue; + } + if (params.isDisallowedIntentsError(err)) { + return; + } + throw err; + } + } + await waitForDiscordGatewayStop({ gateway: gateway ? { @@ -87,32 +126,15 @@ export async function runDiscordGatewayLifecycle(params: { } : undefined, abortSignal: params.abortSignal, - onGatewayError: (err) => { - if (params.isDisallowedIntentsError(err)) { - sawDisallowedIntents = true; - params.runtime.error?.( - danger( - "discord: gateway closed with code 4014 (missing privileged gateway intents). Enable the required intents in the Discord Developer Portal or disable them in config.", - ), - ); - return; - } - params.runtime.error?.(danger(`discord gateway error: ${String(err)}`)); - }, - shouldStopOnError: (err) => { - const message = String(err); - return ( - message.includes("Max reconnect attempts") || - message.includes("Fatal Gateway error") || - params.isDisallowedIntentsError(err) - ); - }, + onGatewayError: logGatewayError, + shouldStopOnError: shouldStopOnGatewayError, }); } catch (err) { if (!sawDisallowedIntents && !params.isDisallowedIntentsError(err)) { throw err; } } finally { + params.releaseEarlyGatewayErrorGuard?.(); unregisterGateway(params.accountId); stopGatewayLogging(); if (helloTimeoutId) { diff --git a/src/discord/monitor/provider.test.ts b/src/discord/monitor/provider.test.ts index 14b137fd1..db998ac67 100644 --- a/src/discord/monitor/provider.test.ts +++ b/src/discord/monitor/provider.test.ts @@ -1,8 +1,11 @@ +import { EventEmitter } from "node:events"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../config/config.js"; import type { RuntimeEnv } from "../../runtime.js"; const { + clientFetchUserMock, + clientGetPluginMock, createDiscordNativeCommandMock, createNoopThreadBindingManagerMock, createThreadBindingManagerMock, @@ -17,6 +20,8 @@ const { } = vi.hoisted(() => { const createdBindingManagers: Array<{ stop: ReturnType }> = []; return { + clientFetchUserMock: vi.fn(async () => ({ id: "bot-1" })), + clientGetPluginMock: vi.fn(() => undefined), createDiscordNativeCommandMock: vi.fn(() => ({ name: "mock-command" })), createNoopThreadBindingManagerMock: vi.fn(() => { const manager = { stop: vi.fn() }; @@ -65,11 +70,11 @@ vi.mock("@buape/carbon", () => { async handleDeployRequest() { return undefined; } - async fetchUser(_target: string) { - return { id: "bot-1" }; + async fetchUser(target: string) { + return await clientFetchUserMock(target); } - getPlugin(_name: string) { - return undefined; + getPlugin(name: string) { + return clientGetPluginMock(name); } } return { Client, ReadyListener }; @@ -242,6 +247,8 @@ describe("monitorDiscordProvider", () => { }) as OpenClawConfig; beforeEach(() => { + clientFetchUserMock.mockClear().mockResolvedValue({ id: "bot-1" }); + clientGetPluginMock.mockClear().mockReturnValue(undefined); createDiscordNativeCommandMock.mockClear().mockReturnValue({ name: "mock-command" }); createNoopThreadBindingManagerMock.mockClear(); createThreadBindingManagerMock.mockClear(); @@ -290,4 +297,28 @@ describe("monitorDiscordProvider", () => { expect(createdBindingManagers).toHaveLength(1); expect(createdBindingManagers[0]?.stop).toHaveBeenCalledTimes(1); }); + + it("captures gateway errors emitted before lifecycle wait starts", async () => { + const { monitorDiscordProvider } = await import("./provider.js"); + const emitter = new EventEmitter(); + clientGetPluginMock.mockImplementation((name: string) => + name === "gateway" ? { emitter, disconnect: vi.fn() } : undefined, + ); + clientFetchUserMock.mockImplementationOnce(async () => { + emitter.emit("error", new Error("Fatal Gateway error: 4014")); + return { id: "bot-1" }; + }); + + await monitorDiscordProvider({ + config: baseConfig(), + runtime: baseRuntime(), + }); + + expect(monitorLifecycleMock).toHaveBeenCalledTimes(1); + const lifecycleArgs = monitorLifecycleMock.mock.calls[0]?.[0] as { + pendingGatewayErrors?: unknown[]; + }; + expect(lifecycleArgs.pendingGatewayErrors).toHaveLength(1); + expect(String(lifecycleArgs.pendingGatewayErrors?.[0])).toContain("4014"); + }); }); diff --git a/src/discord/monitor/provider.ts b/src/discord/monitor/provider.ts index 629f8a3e7..2239503a5 100644 --- a/src/discord/monitor/provider.ts +++ b/src/discord/monitor/provider.ts @@ -34,6 +34,7 @@ import { createDiscordRetryRunner } from "../../infra/retry-policy.js"; import { createSubsystemLogger } from "../../logging/subsystem.js"; import { createNonExitingRuntime, type RuntimeEnv } from "../../runtime.js"; import { resolveDiscordAccount } from "../accounts.js"; +import { getDiscordGatewayEmitter } from "../monitor.gateway.js"; import { fetchDiscordApplicationId } from "../probe.js"; import { normalizeDiscordToken } from "../token.js"; import { createDiscordVoiceCommand } from "../voice/command.js"; @@ -229,6 +230,33 @@ function isDiscordDisallowedIntentsError(err: unknown): boolean { return message.includes(String(DISCORD_DISALLOWED_INTENTS_CODE)); } +type EarlyGatewayErrorGuard = { + pendingErrors: unknown[]; + release: () => void; +}; + +function attachEarlyGatewayErrorGuard(client: Client): EarlyGatewayErrorGuard { + const pendingErrors: unknown[] = []; + const gateway = client.getPlugin("gateway"); + const emitter = getDiscordGatewayEmitter(gateway); + if (!emitter) { + return { + pendingErrors, + release: () => {}, + }; + } + const onGatewayError = (err: unknown) => { + pendingErrors.push(err); + }; + emitter.on("error", onGatewayError); + return { + pendingErrors, + release: () => { + emitter.removeListener("error", onGatewayError); + }, + }; +} + export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { const cfg = opts.config ?? loadConfig(); const account = resolveDiscordAccount({ @@ -365,6 +393,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { }) : createNoopThreadBindingManager(account.accountId); let lifecycleStarted = false; + let releaseEarlyGatewayErrorGuard = () => {}; try { const commands: BaseCommand[] = commandSpecs.map((spec) => createDiscordNativeCommand({ @@ -496,6 +525,8 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { }, clientPlugins, ); + const earlyGatewayErrorGuard = attachEarlyGatewayErrorGuard(client); + releaseEarlyGatewayErrorGuard = earlyGatewayErrorGuard.release; await deployDiscordCommands({ client, runtime, enabled: nativeEnabled }); @@ -612,8 +643,11 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) { voiceManagerRef, execApprovalsHandler, threadBindings, + pendingGatewayErrors: earlyGatewayErrorGuard.pendingErrors, + releaseEarlyGatewayErrorGuard, }); } finally { + releaseEarlyGatewayErrorGuard(); if (!lifecycleStarted) { threadBindings.stop(); }