From dd07c06d003b0192375d49a2d6842a5be617230b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 22 Feb 2026 10:36:11 +0100 Subject: [PATCH] fix: tighten gateway restart loop handling (#23416) (thanks @jeffwnli) --- CHANGELOG.md | 1 + src/cli/gateway-cli/run-loop.test.ts | 10 ++++---- src/cli/gateway-cli/run-loop.ts | 37 +++++++++++++++++++++++----- src/infra/infra-parsing.test.ts | 11 +++++++++ src/infra/is-main.ts | 10 ++++++++ src/shared/pid-alive.test.ts | 12 ++++++--- 6 files changed, 67 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a723a5be8..824e5a8d1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai ### Fixes +- Gateway/Restart: fix restart-loop edge cases by keeping `openclaw.mjs -> dist/entry.js` bootstrap detection explicit, reacquiring the gateway lock for in-process restart fallback paths, and tightening restart-loop regression coverage. (#23416) Thanks @jeffwnli. - Security/Audit: add `openclaw security audit` detection for open group policies that expose runtime/filesystem tools without sandbox/workspace guards (`security.exposure.open_groups_with_runtime_or_fs`). - Security/Exec env: block request-scoped `HOME` and `ZDOTDIR` overrides in host exec env sanitizers (Node + macOS), preventing shell startup-file execution before allowlist-evaluated command bodies. This ships in the next npm release. Thanks @tdjackey for reporting. - Security/Gateway: emit a startup security warning when insecure/dangerous config flags are enabled (including `gateway.controlUi.dangerouslyDisableDeviceAuth=true`) and point operators to `openclaw security audit`. diff --git a/src/cli/gateway-cli/run-loop.test.ts b/src/cli/gateway-cli/run-loop.test.ts index 74f6835be..c814f5dc9 100644 --- a/src/cli/gateway-cli/run-loop.test.ts +++ b/src/cli/gateway-cli/run-loop.test.ts @@ -11,7 +11,9 @@ const markGatewaySigusr1RestartHandled = vi.fn(); const getActiveTaskCount = vi.fn(() => 0); const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true })); const resetAllLanes = vi.fn(); -const restartGatewayProcessWithFreshPid = vi.fn(() => ({ mode: "skipped" as const })); +const restartGatewayProcessWithFreshPid = vi.fn< + () => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string } +>(() => ({ mode: "disabled" })); const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart"; const gatewayLog = { info: vi.fn(), @@ -30,8 +32,7 @@ vi.mock("../../infra/restart.js", () => ({ })); vi.mock("../../infra/process-respawn.js", () => ({ - restartGatewayProcessWithFreshPid: (...args: unknown[]) => - restartGatewayProcessWithFreshPid(...args), + restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(), })); vi.mock("../../process/command-queue.js", () => ({ @@ -140,6 +141,7 @@ describe("runGatewayLoop", () => { }); expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2); expect(resetAllLanes).toHaveBeenCalledTimes(2); + expect(acquireGatewayLock).toHaveBeenCalledTimes(3); } finally { removeNewSignalListeners("SIGTERM", beforeSigterm); removeNewSignalListeners("SIGINT", beforeSigint); @@ -153,8 +155,6 @@ describe("runGatewayLoop", () => { const lockRelease = vi.fn(async () => {}); acquireGatewayLock.mockResolvedValueOnce({ release: lockRelease, - lockPath: "/tmp/test.lock", - configPath: "/test/openclaw.json", }); // Override process-respawn to return "spawned" mode diff --git a/src/cli/gateway-cli/run-loop.ts b/src/cli/gateway-cli/run-loop.ts index a46017431..6c1eab6fb 100644 --- a/src/cli/gateway-cli/run-loop.ts +++ b/src/cli/gateway-cli/run-loop.ts @@ -23,7 +23,7 @@ export async function runGatewayLoop(params: { start: () => Promise>>; runtime: typeof defaultRuntime; }) { - const lock = await acquireGatewayLock(); + let lock = await acquireGatewayLock(); let server: Awaited> | null = null; let shuttingDown = false; let restartResolver: (() => void) | null = null; @@ -83,8 +83,12 @@ export async function runGatewayLoop(params: { clearTimeout(forceExitTimer); server = null; if (isRestart) { + const hadLock = lock != null; // Release the lock BEFORE spawning so the child can acquire it immediately. - await lock?.release(); + if (lock) { + await lock.release(); + lock = null; + } const respawn = restartGatewayProcessWithFreshPid(); if (respawn.mode === "spawned" || respawn.mode === "supervised") { const modeLabel = @@ -102,11 +106,29 @@ export async function runGatewayLoop(params: { } else { gatewayLog.info("restart mode: in-process restart (OPENCLAW_NO_RESPAWN)"); } - shuttingDown = false; - restartResolver?.(); + let canContinueInProcessRestart = true; + if (hadLock) { + try { + lock = await acquireGatewayLock(); + } catch (err) { + gatewayLog.error( + `failed to reacquire gateway lock for in-process restart: ${String(err)}`, + ); + cleanupSignals(); + params.runtime.exit(1); + canContinueInProcessRestart = false; + } + } + if (canContinueInProcessRestart) { + shuttingDown = false; + restartResolver?.(); + } } } else { - await lock?.release(); + if (lock) { + await lock.release(); + lock = null; + } cleanupSignals(); params.runtime.exit(0); } @@ -161,7 +183,10 @@ export async function runGatewayLoop(params: { }); } } finally { - await lock?.release(); + if (lock) { + await lock.release(); + lock = null; + } cleanupSignals(); } } diff --git a/src/infra/infra-parsing.test.ts b/src/infra/infra-parsing.test.ts index e9ba7f6d6..2aa613834 100644 --- a/src/infra/infra-parsing.test.ts +++ b/src/infra/infra-parsing.test.ts @@ -56,6 +56,17 @@ describe("infra parsing", () => { ).toBe(true); }); + it("returns true for dist/entry.js when launched via openclaw.mjs wrapper", () => { + expect( + isMainModule({ + currentFile: "/repo/dist/entry.js", + argv: ["node", "/repo/openclaw.mjs"], + cwd: "/repo", + env: {}, + }), + ).toBe(true); + }); + it("returns false when running under PM2 but this module is imported", () => { expect( isMainModule({ diff --git a/src/infra/is-main.ts b/src/infra/is-main.ts index 23c036cc3..cc3070f62 100644 --- a/src/infra/is-main.ts +++ b/src/infra/is-main.ts @@ -41,6 +41,16 @@ export function isMainModule({ return true; } + // The published/open-source wrapper binary is openclaw.mjs, which then imports + // dist/entry.js. Treat that pair as the main module so entry bootstrap runs. + if (normalizedCurrent && normalizedArgv1) { + const currentBase = path.basename(normalizedCurrent); + const argvBase = path.basename(normalizedArgv1); + if (currentBase === "entry.js" && (argvBase === "openclaw.mjs" || argvBase === "openclaw.js")) { + return true; + } + } + // Fallback: basename match (relative paths, symlinked bins). if ( normalizedCurrent && diff --git a/src/shared/pid-alive.test.ts b/src/shared/pid-alive.test.ts index 70249a961..862101bb7 100644 --- a/src/shared/pid-alive.test.ts +++ b/src/shared/pid-alive.test.ts @@ -31,8 +31,14 @@ describe("isPidAlive", () => { }); // Override platform to linux so the zombie check runs - const originalPlatform = process.platform; - Object.defineProperty(process, "platform", { value: "linux", writable: true }); + const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform"); + if (!originalPlatformDescriptor) { + throw new Error("missing process.platform descriptor"); + } + Object.defineProperty(process, "platform", { + ...originalPlatformDescriptor, + value: "linux", + }); try { // Re-import the module so it picks up the mocked platform and fs @@ -40,7 +46,7 @@ describe("isPidAlive", () => { const { isPidAlive: freshIsPidAlive } = await import("./pid-alive.js"); expect(freshIsPidAlive(zombiePid)).toBe(false); } finally { - Object.defineProperty(process, "platform", { value: originalPlatform, writable: true }); + Object.defineProperty(process, "platform", originalPlatformDescriptor); vi.restoreAllMocks(); } });