fix: release gateway lock before process.exit in run-loop

process.exit() called from inside an async IIFE bypasses the outer
try/finally block that releases the gateway lock. This leaves a stale
lock file pointing to a zombie PID, preventing the spawned child or
systemctl restart from acquiring the lock. Release the lock explicitly
before calling exit in both the restart-spawned and stop code paths.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
jeffr
2026-02-21 23:14:41 -08:00
committed by Peter Steinberger
parent 6eaf2baa57
commit 01bd83d644
2 changed files with 82 additions and 1 deletions

View File

@@ -11,6 +11,7 @@ const markGatewaySigusr1RestartHandled = vi.fn();
const getActiveTaskCount = vi.fn(() => 0);
const waitForActiveTasks = vi.fn(async (_timeoutMs: number) => ({ drained: true }));
const resetAllLanes = vi.fn();
const restartGatewayProcessWithFreshPid = vi.fn(() => ({ mode: "skipped" as const }));
const DRAIN_TIMEOUT_LOG = "drain timeout reached; proceeding with restart";
const gatewayLog = {
info: vi.fn(),
@@ -29,7 +30,8 @@ vi.mock("../../infra/restart.js", () => ({
}));
vi.mock("../../infra/process-respawn.js", () => ({
restartGatewayProcessWithFreshPid: () => ({ mode: "skipped" }),
restartGatewayProcessWithFreshPid: (...args: unknown[]) =>
restartGatewayProcessWithFreshPid(...args),
}));
vi.mock("../../process/command-queue.js", () => ({
@@ -144,6 +146,83 @@ describe("runGatewayLoop", () => {
removeNewSignalListeners("SIGUSR1", beforeSigusr1);
}
});
it("releases the lock before exiting on spawned restart", async () => {
vi.clearAllMocks();
const lockRelease = vi.fn(async () => {});
acquireGatewayLock.mockResolvedValueOnce({
release: lockRelease,
lockPath: "/tmp/test.lock",
configPath: "/test/openclaw.json",
});
// Override process-respawn to return "spawned" mode
restartGatewayProcessWithFreshPid.mockReturnValueOnce({
mode: "spawned",
pid: 9999,
});
const close = vi.fn(async () => {});
let resolveStarted: (() => void) | null = null;
const started = new Promise<void>((resolve) => {
resolveStarted = resolve;
});
const start = vi.fn(async () => {
resolveStarted?.();
return { close };
});
const exitCallOrder: string[] = [];
const runtime = {
log: vi.fn(),
error: vi.fn(),
exit: vi.fn(() => {
exitCallOrder.push("exit");
}),
};
lockRelease.mockImplementation(async () => {
exitCallOrder.push("lockRelease");
});
const beforeSigterm = new Set(
process.listeners("SIGTERM") as Array<(...args: unknown[]) => void>,
);
const beforeSigint = new Set(
process.listeners("SIGINT") as Array<(...args: unknown[]) => void>,
);
const beforeSigusr1 = new Set(
process.listeners("SIGUSR1") as Array<(...args: unknown[]) => void>,
);
vi.resetModules();
const { runGatewayLoop } = await import("./run-loop.js");
const _loopPromise = runGatewayLoop({
start: start as unknown as Parameters<typeof runGatewayLoop>[0]["start"],
runtime: runtime as unknown as Parameters<typeof runGatewayLoop>[0]["runtime"],
});
try {
await started;
await new Promise<void>((resolve) => setImmediate(resolve));
process.emit("SIGUSR1");
// Wait for the shutdown path to complete
await new Promise<void>((resolve) => setTimeout(resolve, 100));
expect(lockRelease).toHaveBeenCalled();
expect(runtime.exit).toHaveBeenCalledWith(0);
// Lock must be released BEFORE exit
expect(exitCallOrder).toEqual(["lockRelease", "exit"]);
} finally {
removeNewSignalListeners("SIGTERM", beforeSigterm);
removeNewSignalListeners("SIGINT", beforeSigint);
removeNewSignalListeners("SIGUSR1", beforeSigusr1);
}
});
});
describe("gateway discover routing helpers", () => {

View File

@@ -90,6 +90,7 @@ export async function runGatewayLoop(params: {
? `spawned pid ${respawn.pid ?? "unknown"}`
: "supervisor restart";
gatewayLog.info(`restart mode: full process restart (${modeLabel})`);
await lock?.release();
cleanupSignals();
params.runtime.exit(0);
} else {
@@ -104,6 +105,7 @@ export async function runGatewayLoop(params: {
restartResolver?.();
}
} else {
await lock?.release();
cleanupSignals();
params.runtime.exit(0);
}