fix: tighten gateway restart loop handling (#23416) (thanks @jeffwnli)

This commit is contained in:
Peter Steinberger
2026-02-22 10:36:11 +01:00
parent 26acb77450
commit dd07c06d00
6 changed files with 67 additions and 14 deletions

View File

@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
### Fixes
- Gateway/Restart: fix restart-loop edge cases by keeping `openclaw.mjs -> dist/entry.js` bootstrap detection explicit, reacquiring the gateway lock for in-process restart fallback paths, and tightening restart-loop regression coverage. (#23416) Thanks @jeffwnli.
- Security/Audit: add `openclaw security audit` detection for open group policies that expose runtime/filesystem tools without sandbox/workspace guards (`security.exposure.open_groups_with_runtime_or_fs`).
- Security/Exec env: block request-scoped `HOME` and `ZDOTDIR` overrides in host exec env sanitizers (Node + macOS), preventing shell startup-file execution before allowlist-evaluated command bodies. This ships in the next npm release. Thanks @tdjackey for reporting.
- Security/Gateway: emit a startup security warning when insecure/dangerous config flags are enabled (including `gateway.controlUi.dangerouslyDisableDeviceAuth=true`) and point operators to `openclaw security audit`.

View File

@@ -11,7 +11,9 @@ const markGatewaySigusr1RestartHandled = vi.fn();
const getActiveTaskCount = vi.fn(() => 0);
const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true }));
const resetAllLanes = vi.fn();
const restartGatewayProcessWithFreshPid = vi.fn(() => ({ mode: "skipped" as const }));
const restartGatewayProcessWithFreshPid = vi.fn<
() => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string }
>(() => ({ mode: "disabled" }));
const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart";
const gatewayLog = {
info: vi.fn(),
@@ -30,8 +32,7 @@ vi.mock("../../infra/restart.js", () => ({
}));
vi.mock("../../infra/process-respawn.js", () => ({
restartGatewayProcessWithFreshPid: (...args: unknown[]) =>
restartGatewayProcessWithFreshPid(...args),
restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(),
}));
vi.mock("../../process/command-queue.js", () => ({
@@ -140,6 +141,7 @@ describe("runGatewayLoop", () => {
});
expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2);
expect(resetAllLanes).toHaveBeenCalledTimes(2);
expect(acquireGatewayLock).toHaveBeenCalledTimes(3);
} finally {
removeNewSignalListeners("SIGTERM", beforeSigterm);
removeNewSignalListeners("SIGINT", beforeSigint);
@@ -153,8 +155,6 @@ describe("runGatewayLoop", () => {
const lockRelease = vi.fn(async () => {});
acquireGatewayLock.mockResolvedValueOnce({
release: lockRelease,
lockPath: "/tmp/test.lock",
configPath: "/test/openclaw.json",
});
// Override process-respawn to return "spawned" mode

View File

@@ -23,7 +23,7 @@ export async function runGatewayLoop(params: {
start: () => Promise<Awaited<ReturnType<typeof startGatewayServer>>>;
runtime: typeof defaultRuntime;
}) {
const lock = await acquireGatewayLock();
let lock = await acquireGatewayLock();
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
let shuttingDown = false;
let restartResolver: (() => void) | null = null;
@@ -83,8 +83,12 @@ export async function runGatewayLoop(params: {
clearTimeout(forceExitTimer);
server = null;
if (isRestart) {
const hadLock = lock != null;
// Release the lock BEFORE spawning so the child can acquire it immediately.
await lock?.release();
if (lock) {
await lock.release();
lock = null;
}
const respawn = restartGatewayProcessWithFreshPid();
if (respawn.mode === "spawned" || respawn.mode === "supervised") {
const modeLabel =
@@ -102,11 +106,29 @@ export async function runGatewayLoop(params: {
} else {
gatewayLog.info("restart mode: in-process restart (OPENCLAW_NO_RESPAWN)");
}
shuttingDown = false;
restartResolver?.();
let canContinueInProcessRestart = true;
if (hadLock) {
try {
lock = await acquireGatewayLock();
} catch (err) {
gatewayLog.error(
`failed to reacquire gateway lock for in-process restart: ${String(err)}`,
);
cleanupSignals();
params.runtime.exit(1);
canContinueInProcessRestart = false;
}
}
if (canContinueInProcessRestart) {
shuttingDown = false;
restartResolver?.();
}
}
} else {
await lock?.release();
if (lock) {
await lock.release();
lock = null;
}
cleanupSignals();
params.runtime.exit(0);
}
@@ -161,7 +183,10 @@ export async function runGatewayLoop(params: {
});
}
} finally {
await lock?.release();
if (lock) {
await lock.release();
lock = null;
}
cleanupSignals();
}
}

View File

@@ -56,6 +56,17 @@ describe("infra parsing", () => {
).toBe(true);
});
it("returns true for dist/entry.js when launched via openclaw.mjs wrapper", () => {
expect(
isMainModule({
currentFile: "/repo/dist/entry.js",
argv: ["node", "/repo/openclaw.mjs"],
cwd: "/repo",
env: {},
}),
).toBe(true);
});
it("returns false when running under PM2 but this module is imported", () => {
expect(
isMainModule({

View File

@@ -41,6 +41,16 @@ export function isMainModule({
return true;
}
// The published/open-source wrapper binary is openclaw.mjs, which then imports
// dist/entry.js. Treat that pair as the main module so entry bootstrap runs.
if (normalizedCurrent && normalizedArgv1) {
const currentBase = path.basename(normalizedCurrent);
const argvBase = path.basename(normalizedArgv1);
if (currentBase === "entry.js" && (argvBase === "openclaw.mjs" || argvBase === "openclaw.js")) {
return true;
}
}
// Fallback: basename match (relative paths, symlinked bins).
if (
normalizedCurrent &&

View File

@@ -31,8 +31,14 @@ describe("isPidAlive", () => {
});
// Override platform to linux so the zombie check runs
const originalPlatform = process.platform;
Object.defineProperty(process, "platform", { value: "linux", writable: true });
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
if (!originalPlatformDescriptor) {
throw new Error("missing process.platform descriptor");
}
Object.defineProperty(process, "platform", {
...originalPlatformDescriptor,
value: "linux",
});
try {
// Re-import the module so it picks up the mocked platform and fs
@@ -40,7 +46,7 @@ describe("isPidAlive", () => {
const { isPidAlive: freshIsPidAlive } = await import("./pid-alive.js");
expect(freshIsPidAlive(zombiePid)).toBe(false);
} finally {
Object.defineProperty(process, "platform", { value: originalPlatform, writable: true });
Object.defineProperty(process, "platform", originalPlatformDescriptor);
vi.restoreAllMocks();
}
});