fix: tighten gateway restart loop handling (#23416) (thanks @jeffwnli)
This commit is contained in:
@@ -18,6 +18,7 @@ Docs: https://docs.openclaw.ai
|
||||
|
||||
### Fixes
|
||||
|
||||
- Gateway/Restart: fix restart-loop edge cases by keeping `openclaw.mjs -> dist/entry.js` bootstrap detection explicit, reacquiring the gateway lock for in-process restart fallback paths, and tightening restart-loop regression coverage. (#23416) Thanks @jeffwnli.
|
||||
- Security/Audit: add `openclaw security audit` detection for open group policies that expose runtime/filesystem tools without sandbox/workspace guards (`security.exposure.open_groups_with_runtime_or_fs`).
|
||||
- Security/Exec env: block request-scoped `HOME` and `ZDOTDIR` overrides in host exec env sanitizers (Node + macOS), preventing shell startup-file execution before allowlist-evaluated command bodies. This ships in the next npm release. Thanks @tdjackey for reporting.
|
||||
- Security/Gateway: emit a startup security warning when insecure/dangerous config flags are enabled (including `gateway.controlUi.dangerouslyDisableDeviceAuth=true`) and point operators to `openclaw security audit`.
|
||||
|
||||
@@ -11,7 +11,9 @@ const markGatewaySigusr1RestartHandled = vi.fn();
|
||||
const getActiveTaskCount = vi.fn(() => 0);
|
||||
const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true }));
|
||||
const resetAllLanes = vi.fn();
|
||||
const restartGatewayProcessWithFreshPid = vi.fn(() => ({ mode: "skipped" as const }));
|
||||
const restartGatewayProcessWithFreshPid = vi.fn<
|
||||
() => { mode: "spawned" | "supervised" | "disabled" | "failed"; pid?: number; detail?: string }
|
||||
>(() => ({ mode: "disabled" }));
|
||||
const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart";
|
||||
const gatewayLog = {
|
||||
info: vi.fn(),
|
||||
@@ -30,8 +32,7 @@ vi.mock("../../infra/restart.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../../infra/process-respawn.js", () => ({
|
||||
restartGatewayProcessWithFreshPid: (...args: unknown[]) =>
|
||||
restartGatewayProcessWithFreshPid(...args),
|
||||
restartGatewayProcessWithFreshPid: () => restartGatewayProcessWithFreshPid(),
|
||||
}));
|
||||
|
||||
vi.mock("../../process/command-queue.js", () => ({
|
||||
@@ -140,6 +141,7 @@ describe("runGatewayLoop", () => {
|
||||
});
|
||||
expect(markGatewaySigusr1RestartHandled).toHaveBeenCalledTimes(2);
|
||||
expect(resetAllLanes).toHaveBeenCalledTimes(2);
|
||||
expect(acquireGatewayLock).toHaveBeenCalledTimes(3);
|
||||
} finally {
|
||||
removeNewSignalListeners("SIGTERM", beforeSigterm);
|
||||
removeNewSignalListeners("SIGINT", beforeSigint);
|
||||
@@ -153,8 +155,6 @@ describe("runGatewayLoop", () => {
|
||||
const lockRelease = vi.fn(async () => {});
|
||||
acquireGatewayLock.mockResolvedValueOnce({
|
||||
release: lockRelease,
|
||||
lockPath: "/tmp/test.lock",
|
||||
configPath: "/test/openclaw.json",
|
||||
});
|
||||
|
||||
// Override process-respawn to return "spawned" mode
|
||||
|
||||
@@ -23,7 +23,7 @@ export async function runGatewayLoop(params: {
|
||||
start: () => Promise<Awaited<ReturnType<typeof startGatewayServer>>>;
|
||||
runtime: typeof defaultRuntime;
|
||||
}) {
|
||||
const lock = await acquireGatewayLock();
|
||||
let lock = await acquireGatewayLock();
|
||||
let server: Awaited<ReturnType<typeof startGatewayServer>> | null = null;
|
||||
let shuttingDown = false;
|
||||
let restartResolver: (() => void) | null = null;
|
||||
@@ -83,8 +83,12 @@ export async function runGatewayLoop(params: {
|
||||
clearTimeout(forceExitTimer);
|
||||
server = null;
|
||||
if (isRestart) {
|
||||
const hadLock = lock != null;
|
||||
// Release the lock BEFORE spawning so the child can acquire it immediately.
|
||||
await lock?.release();
|
||||
if (lock) {
|
||||
await lock.release();
|
||||
lock = null;
|
||||
}
|
||||
const respawn = restartGatewayProcessWithFreshPid();
|
||||
if (respawn.mode === "spawned" || respawn.mode === "supervised") {
|
||||
const modeLabel =
|
||||
@@ -102,11 +106,29 @@ export async function runGatewayLoop(params: {
|
||||
} else {
|
||||
gatewayLog.info("restart mode: in-process restart (OPENCLAW_NO_RESPAWN)");
|
||||
}
|
||||
shuttingDown = false;
|
||||
restartResolver?.();
|
||||
let canContinueInProcessRestart = true;
|
||||
if (hadLock) {
|
||||
try {
|
||||
lock = await acquireGatewayLock();
|
||||
} catch (err) {
|
||||
gatewayLog.error(
|
||||
`failed to reacquire gateway lock for in-process restart: ${String(err)}`,
|
||||
);
|
||||
cleanupSignals();
|
||||
params.runtime.exit(1);
|
||||
canContinueInProcessRestart = false;
|
||||
}
|
||||
}
|
||||
if (canContinueInProcessRestart) {
|
||||
shuttingDown = false;
|
||||
restartResolver?.();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
await lock?.release();
|
||||
if (lock) {
|
||||
await lock.release();
|
||||
lock = null;
|
||||
}
|
||||
cleanupSignals();
|
||||
params.runtime.exit(0);
|
||||
}
|
||||
@@ -161,7 +183,10 @@ export async function runGatewayLoop(params: {
|
||||
});
|
||||
}
|
||||
} finally {
|
||||
await lock?.release();
|
||||
if (lock) {
|
||||
await lock.release();
|
||||
lock = null;
|
||||
}
|
||||
cleanupSignals();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -56,6 +56,17 @@ describe("infra parsing", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true for dist/entry.js when launched via openclaw.mjs wrapper", () => {
|
||||
expect(
|
||||
isMainModule({
|
||||
currentFile: "/repo/dist/entry.js",
|
||||
argv: ["node", "/repo/openclaw.mjs"],
|
||||
cwd: "/repo",
|
||||
env: {},
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false when running under PM2 but this module is imported", () => {
|
||||
expect(
|
||||
isMainModule({
|
||||
|
||||
@@ -41,6 +41,16 @@ export function isMainModule({
|
||||
return true;
|
||||
}
|
||||
|
||||
// The published/open-source wrapper binary is openclaw.mjs, which then imports
|
||||
// dist/entry.js. Treat that pair as the main module so entry bootstrap runs.
|
||||
if (normalizedCurrent && normalizedArgv1) {
|
||||
const currentBase = path.basename(normalizedCurrent);
|
||||
const argvBase = path.basename(normalizedArgv1);
|
||||
if (currentBase === "entry.js" && (argvBase === "openclaw.mjs" || argvBase === "openclaw.js")) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback: basename match (relative paths, symlinked bins).
|
||||
if (
|
||||
normalizedCurrent &&
|
||||
|
||||
@@ -31,8 +31,14 @@ describe("isPidAlive", () => {
|
||||
});
|
||||
|
||||
// Override platform to linux so the zombie check runs
|
||||
const originalPlatform = process.platform;
|
||||
Object.defineProperty(process, "platform", { value: "linux", writable: true });
|
||||
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, "platform");
|
||||
if (!originalPlatformDescriptor) {
|
||||
throw new Error("missing process.platform descriptor");
|
||||
}
|
||||
Object.defineProperty(process, "platform", {
|
||||
...originalPlatformDescriptor,
|
||||
value: "linux",
|
||||
});
|
||||
|
||||
try {
|
||||
// Re-import the module so it picks up the mocked platform and fs
|
||||
@@ -40,7 +46,7 @@ describe("isPidAlive", () => {
|
||||
const { isPidAlive: freshIsPidAlive } = await import("./pid-alive.js");
|
||||
expect(freshIsPidAlive(zombiePid)).toBe(false);
|
||||
} finally {
|
||||
Object.defineProperty(process, "platform", { value: originalPlatform, writable: true });
|
||||
Object.defineProperty(process, "platform", originalPlatformDescriptor);
|
||||
vi.restoreAllMocks();
|
||||
}
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user