Discord: handle early gateway startup errors

This commit is contained in:
Theo Tarr
2026-02-22 13:43:06 -05:00
committed by Peter Steinberger
parent 9b81a53016
commit 7af6849c2f
5 changed files with 205 additions and 36 deletions

View File

@@ -24,6 +24,7 @@ Docs: https://docs.openclaw.ai
- Telegram/Webhook: pre-initialize webhook bots, switch webhook processing to callback-mode JSON handling, and preserve full near-limit payload reads under delayed handlers to prevent webhook request hangs and dropped updates. (#26156)
- Agents/Subagents delivery: refactor subagent completion announce dispatch into an explicit queue/direct/fallback state machine, recover outbound channel-plugin resolution in cold/stale plugin-registry states across announce/message/gateway send paths, finalize cleanup bookkeeping when announce flow rejects, and treat Telegram sends without `message_id` as delivery failures (instead of false-success `"unknown"` IDs). (#26867, #25961, #26803, #25069, #26741) Thanks @SmithLabsLLC and @docaohieu2808.
- LINE/Lifecycle: keep LINE `startAccount` pending until abort so webhook startup is no longer misread as immediate channel exit, preventing restart-loop storms on LINE provider boot. (#26528) Thanks @Sid-Qin.
- Discord/Gateway: capture and drain startup-time gateway `error` events before lifecycle listeners attach so early `Fatal Gateway error: 4014` closes surface as actionable intent guidance instead of uncaught gateway crashes. (#23832) Thanks @theotarr.
- Security/SSRF guard: classify IPv6 multicast literals (`ff00::/8`) as blocked/private-internal targets in shared SSRF IP checks, preventing multicast literals from bypassing URL-host preflight and DNS answer validation. This ships in the next npm release (`2026.2.25`). Thanks @zpbrent for reporting.
- Slack/Session threads: prevent oversized parent-session inheritance from silently bricking new thread sessions, surface embedded context-overflow empty-result failures to users, and add configurable `session.parentForkMaxTokens` (default `100000`, `0` disables). (#26912) Thanks @markshields-tl.
- Models/Auth probes: map permanent auth failover reasons (`auth_permanent`, for example revoked keys) into probe auth status instead of `unknown`, so `openclaw models status --probe` reports actionable auth failures. (#25754) thanks @rrenamed.

View File

@@ -49,23 +49,33 @@ describe("runDiscordGatewayLifecycle", () => {
accountId?: string;
start?: () => Promise<void>;
stop?: () => Promise<void>;
isDisallowedIntentsError?: (err: unknown) => boolean;
pendingGatewayErrors?: unknown[];
}) => {
const start = vi.fn(params?.start ?? (async () => undefined));
const stop = vi.fn(params?.stop ?? (async () => undefined));
const threadStop = vi.fn();
const runtimeError = vi.fn();
const releaseEarlyGatewayErrorGuard = vi.fn();
return {
start,
stop,
threadStop,
runtimeError,
releaseEarlyGatewayErrorGuard,
lifecycleParams: {
accountId: params?.accountId ?? "default",
client: { getPlugin: vi.fn(() => undefined) } as unknown as Client,
runtime: {} as RuntimeEnv,
isDisallowedIntentsError: () => false,
runtime: {
error: runtimeError,
} as RuntimeEnv,
isDisallowedIntentsError: params?.isDisallowedIntentsError ?? (() => false),
voiceManager: null,
voiceManagerRef: { current: null },
execApprovalsHandler: { start, stop },
threadBindings: { stop: threadStop },
pendingGatewayErrors: params?.pendingGatewayErrors,
releaseEarlyGatewayErrorGuard,
},
};
};
@@ -75,6 +85,7 @@ describe("runDiscordGatewayLifecycle", () => {
stop: ReturnType<typeof vi.fn>;
threadStop: ReturnType<typeof vi.fn>;
waitCalls: number;
releaseEarlyGatewayErrorGuard: ReturnType<typeof vi.fn>;
}) {
expect(params.start).toHaveBeenCalledTimes(1);
expect(params.stop).toHaveBeenCalledTimes(1);
@@ -82,39 +93,109 @@ describe("runDiscordGatewayLifecycle", () => {
expect(unregisterGatewayMock).toHaveBeenCalledWith("default");
expect(stopGatewayLoggingMock).toHaveBeenCalledTimes(1);
expect(params.threadStop).toHaveBeenCalledTimes(1);
expect(params.releaseEarlyGatewayErrorGuard).toHaveBeenCalledTimes(1);
}
it("cleans up thread bindings when exec approvals startup fails", async () => {
const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js");
const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness({
start: async () => {
throw new Error("startup failed");
},
});
const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } =
createLifecycleHarness({
start: async () => {
throw new Error("startup failed");
},
});
await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow("startup failed");
expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 0 });
expectLifecycleCleanup({
start,
stop,
threadStop,
waitCalls: 0,
releaseEarlyGatewayErrorGuard,
});
});
it("cleans up when gateway wait fails after startup", async () => {
const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js");
waitForDiscordGatewayStopMock.mockRejectedValueOnce(new Error("gateway wait failed"));
const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness();
const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } =
createLifecycleHarness();
await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow(
"gateway wait failed",
);
expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 1 });
expectLifecycleCleanup({
start,
stop,
threadStop,
waitCalls: 1,
releaseEarlyGatewayErrorGuard,
});
});
it("cleans up after successful gateway wait", async () => {
const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js");
const { lifecycleParams, start, stop, threadStop } = createLifecycleHarness();
const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } =
createLifecycleHarness();
await expect(runDiscordGatewayLifecycle(lifecycleParams)).resolves.toBeUndefined();
expectLifecycleCleanup({ start, stop, threadStop, waitCalls: 1 });
expectLifecycleCleanup({
start,
stop,
threadStop,
waitCalls: 1,
releaseEarlyGatewayErrorGuard,
});
});
it("handles queued disallowed intents errors without waiting for gateway events", async () => {
const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js");
const {
lifecycleParams,
start,
stop,
threadStop,
runtimeError,
releaseEarlyGatewayErrorGuard,
} = createLifecycleHarness({
pendingGatewayErrors: [new Error("Fatal Gateway error: 4014")],
isDisallowedIntentsError: (err) => String(err).includes("4014"),
});
await expect(runDiscordGatewayLifecycle(lifecycleParams)).resolves.toBeUndefined();
expect(runtimeError).toHaveBeenCalledWith(
expect.stringContaining("discord: gateway closed with code 4014"),
);
expectLifecycleCleanup({
start,
stop,
threadStop,
waitCalls: 0,
releaseEarlyGatewayErrorGuard,
});
});
it("throws queued non-disallowed fatal gateway errors", async () => {
const { runDiscordGatewayLifecycle } = await import("./provider.lifecycle.js");
const { lifecycleParams, start, stop, threadStop, releaseEarlyGatewayErrorGuard } =
createLifecycleHarness({
pendingGatewayErrors: [new Error("Fatal Gateway error: 4000")],
});
await expect(runDiscordGatewayLifecycle(lifecycleParams)).rejects.toThrow(
"Fatal Gateway error: 4000",
);
expectLifecycleCleanup({
start,
stop,
threadStop,
waitCalls: 0,
releaseEarlyGatewayErrorGuard,
});
});
});

View File

@@ -22,6 +22,8 @@ export async function runDiscordGatewayLifecycle(params: {
voiceManagerRef: { current: DiscordVoiceManager | null };
execApprovalsHandler: ExecApprovalsHandler | null;
threadBindings: { stop: () => void };
pendingGatewayErrors?: unknown[];
releaseEarlyGatewayErrorGuard?: () => void;
}) {
const gateway = params.client.getPlugin<GatewayPlugin>("gateway");
if (gateway) {
@@ -74,11 +76,48 @@ export async function runDiscordGatewayLifecycle(params: {
gatewayEmitter?.on("debug", onGatewayDebug);
let sawDisallowedIntents = false;
const logGatewayError = (err: unknown) => {
if (params.isDisallowedIntentsError(err)) {
sawDisallowedIntents = true;
params.runtime.error?.(
danger(
"discord: gateway closed with code 4014 (missing privileged gateway intents). Enable the required intents in the Discord Developer Portal or disable them in config.",
),
);
return;
}
params.runtime.error?.(danger(`discord gateway error: ${String(err)}`));
};
const shouldStopOnGatewayError = (err: unknown) => {
const message = String(err);
return (
message.includes("Max reconnect attempts") ||
message.includes("Fatal Gateway error") ||
params.isDisallowedIntentsError(err)
);
};
try {
if (params.execApprovalsHandler) {
await params.execApprovalsHandler.start();
}
// Drain gateway errors emitted before lifecycle listeners were attached.
const pendingGatewayErrors = params.pendingGatewayErrors ?? [];
if (pendingGatewayErrors.length > 0) {
const queuedErrors = [...pendingGatewayErrors];
pendingGatewayErrors.length = 0;
for (const err of queuedErrors) {
logGatewayError(err);
if (!shouldStopOnGatewayError(err)) {
continue;
}
if (params.isDisallowedIntentsError(err)) {
return;
}
throw err;
}
}
await waitForDiscordGatewayStop({
gateway: gateway
? {
@@ -87,32 +126,15 @@ export async function runDiscordGatewayLifecycle(params: {
}
: undefined,
abortSignal: params.abortSignal,
onGatewayError: (err) => {
if (params.isDisallowedIntentsError(err)) {
sawDisallowedIntents = true;
params.runtime.error?.(
danger(
"discord: gateway closed with code 4014 (missing privileged gateway intents). Enable the required intents in the Discord Developer Portal or disable them in config.",
),
);
return;
}
params.runtime.error?.(danger(`discord gateway error: ${String(err)}`));
},
shouldStopOnError: (err) => {
const message = String(err);
return (
message.includes("Max reconnect attempts") ||
message.includes("Fatal Gateway error") ||
params.isDisallowedIntentsError(err)
);
},
onGatewayError: logGatewayError,
shouldStopOnError: shouldStopOnGatewayError,
});
} catch (err) {
if (!sawDisallowedIntents && !params.isDisallowedIntentsError(err)) {
throw err;
}
} finally {
params.releaseEarlyGatewayErrorGuard?.();
unregisterGateway(params.accountId);
stopGatewayLogging();
if (helloTimeoutId) {

View File

@@ -1,8 +1,11 @@
import { EventEmitter } from "node:events";
import { beforeEach, describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../config/config.js";
import type { RuntimeEnv } from "../../runtime.js";
const {
clientFetchUserMock,
clientGetPluginMock,
createDiscordNativeCommandMock,
createNoopThreadBindingManagerMock,
createThreadBindingManagerMock,
@@ -17,6 +20,8 @@ const {
} = vi.hoisted(() => {
const createdBindingManagers: Array<{ stop: ReturnType<typeof vi.fn> }> = [];
return {
clientFetchUserMock: vi.fn(async () => ({ id: "bot-1" })),
clientGetPluginMock: vi.fn(() => undefined),
createDiscordNativeCommandMock: vi.fn(() => ({ name: "mock-command" })),
createNoopThreadBindingManagerMock: vi.fn(() => {
const manager = { stop: vi.fn() };
@@ -65,11 +70,11 @@ vi.mock("@buape/carbon", () => {
async handleDeployRequest() {
return undefined;
}
async fetchUser(_target: string) {
return { id: "bot-1" };
async fetchUser(target: string) {
return await clientFetchUserMock(target);
}
getPlugin(_name: string) {
return undefined;
getPlugin(name: string) {
return clientGetPluginMock(name);
}
}
return { Client, ReadyListener };
@@ -242,6 +247,8 @@ describe("monitorDiscordProvider", () => {
}) as OpenClawConfig;
beforeEach(() => {
clientFetchUserMock.mockClear().mockResolvedValue({ id: "bot-1" });
clientGetPluginMock.mockClear().mockReturnValue(undefined);
createDiscordNativeCommandMock.mockClear().mockReturnValue({ name: "mock-command" });
createNoopThreadBindingManagerMock.mockClear();
createThreadBindingManagerMock.mockClear();
@@ -290,4 +297,28 @@ describe("monitorDiscordProvider", () => {
expect(createdBindingManagers).toHaveLength(1);
expect(createdBindingManagers[0]?.stop).toHaveBeenCalledTimes(1);
});
it("captures gateway errors emitted before lifecycle wait starts", async () => {
const { monitorDiscordProvider } = await import("./provider.js");
const emitter = new EventEmitter();
clientGetPluginMock.mockImplementation((name: string) =>
name === "gateway" ? { emitter, disconnect: vi.fn() } : undefined,
);
clientFetchUserMock.mockImplementationOnce(async () => {
emitter.emit("error", new Error("Fatal Gateway error: 4014"));
return { id: "bot-1" };
});
await monitorDiscordProvider({
config: baseConfig(),
runtime: baseRuntime(),
});
expect(monitorLifecycleMock).toHaveBeenCalledTimes(1);
const lifecycleArgs = monitorLifecycleMock.mock.calls[0]?.[0] as {
pendingGatewayErrors?: unknown[];
};
expect(lifecycleArgs.pendingGatewayErrors).toHaveLength(1);
expect(String(lifecycleArgs.pendingGatewayErrors?.[0])).toContain("4014");
});
});

View File

@@ -34,6 +34,7 @@ import { createDiscordRetryRunner } from "../../infra/retry-policy.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { createNonExitingRuntime, type RuntimeEnv } from "../../runtime.js";
import { resolveDiscordAccount } from "../accounts.js";
import { getDiscordGatewayEmitter } from "../monitor.gateway.js";
import { fetchDiscordApplicationId } from "../probe.js";
import { normalizeDiscordToken } from "../token.js";
import { createDiscordVoiceCommand } from "../voice/command.js";
@@ -229,6 +230,33 @@ function isDiscordDisallowedIntentsError(err: unknown): boolean {
return message.includes(String(DISCORD_DISALLOWED_INTENTS_CODE));
}
type EarlyGatewayErrorGuard = {
pendingErrors: unknown[];
release: () => void;
};
function attachEarlyGatewayErrorGuard(client: Client): EarlyGatewayErrorGuard {
const pendingErrors: unknown[] = [];
const gateway = client.getPlugin<GatewayPlugin>("gateway");
const emitter = getDiscordGatewayEmitter(gateway);
if (!emitter) {
return {
pendingErrors,
release: () => {},
};
}
const onGatewayError = (err: unknown) => {
pendingErrors.push(err);
};
emitter.on("error", onGatewayError);
return {
pendingErrors,
release: () => {
emitter.removeListener("error", onGatewayError);
},
};
}
export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
const cfg = opts.config ?? loadConfig();
const account = resolveDiscordAccount({
@@ -365,6 +393,7 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
})
: createNoopThreadBindingManager(account.accountId);
let lifecycleStarted = false;
let releaseEarlyGatewayErrorGuard = () => {};
try {
const commands: BaseCommand[] = commandSpecs.map((spec) =>
createDiscordNativeCommand({
@@ -496,6 +525,8 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
},
clientPlugins,
);
const earlyGatewayErrorGuard = attachEarlyGatewayErrorGuard(client);
releaseEarlyGatewayErrorGuard = earlyGatewayErrorGuard.release;
await deployDiscordCommands({ client, runtime, enabled: nativeEnabled });
@@ -612,8 +643,11 @@ export async function monitorDiscordProvider(opts: MonitorDiscordOpts = {}) {
voiceManagerRef,
execApprovalsHandler,
threadBindings,
pendingGatewayErrors: earlyGatewayErrorGuard.pendingErrors,
releaseEarlyGatewayErrorGuard,
});
} finally {
releaseEarlyGatewayErrorGuard();
if (!lifecycleStarted) {
threadBindings.stop();
}