diff --git a/src/infra/restart.test.ts b/src/infra/restart.test.ts new file mode 100644 index 000000000..cc858933a --- /dev/null +++ b/src/infra/restart.test.ts @@ -0,0 +1,19 @@ +import { describe, expect, it } from "vitest"; +import { findGatewayPidsOnPortSync } from "./restart.js"; + +describe("findGatewayPidsOnPortSync", () => { + it("returns an empty array for a port with no listeners", () => { + const pids = findGatewayPidsOnPortSync(19999); + expect(pids).toEqual([]); + }); + + it("never includes the current process PID", () => { + const pids = findGatewayPidsOnPortSync(18789); + expect(pids).not.toContain(process.pid); + }); + + it("returns an array (not undefined or null) on any port", () => { + const pids = findGatewayPidsOnPortSync(0); + expect(Array.isArray(pids)).toBe(true); + }); +}); diff --git a/src/infra/restart.ts b/src/infra/restart.ts index 4dd09beaa..35cad0717 100644 --- a/src/infra/restart.ts +++ b/src/infra/restart.ts @@ -1,9 +1,11 @@ import { spawnSync } from "node:child_process"; +import { resolveGatewayPort } from "../config/paths.js"; import { resolveGatewayLaunchAgentLabel, resolveGatewaySystemdServiceName, } from "../daemon/constants.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; +import { resolveLsofCommandSync } from "./ports-lsof.js"; export type RestartAttempt = { ok: boolean; @@ -283,10 +285,106 @@ function normalizeSystemdUnit(raw?: string, profile?: string): string { return unit.endsWith(".service") ? unit : `${unit}.service`; } +/** + * Find PIDs of gateway processes listening on the given port using synchronous lsof. + * Returns only PIDs that belong to openclaw gateway processes (not the current process). + */ +export function findGatewayPidsOnPortSync(port: number): number[] { + if (process.platform === "win32") { + return []; + } + const lsof = resolveLsofCommandSync(); + const res = spawnSync(lsof, ["-nP", `-iTCP:${port}`, "-sTCP:LISTEN", "-Fpc"], { + encoding: "utf8", + timeout: SPAWN_TIMEOUT_MS, + }); + if (res.error || res.status !== 0) { + return []; + } + const pids: number[] = []; + let currentPid: number | undefined; + let currentCmd: string | undefined; + for (const line of res.stdout.split(/\r?\n/).filter(Boolean)) { + if (line.startsWith("p")) { + if (currentPid != null && currentCmd && currentCmd.toLowerCase().includes("openclaw")) { + pids.push(currentPid); + } + const parsed = Number.parseInt(line.slice(1), 10); + currentPid = Number.isFinite(parsed) && parsed > 0 ? parsed : undefined; + currentCmd = undefined; + } else if (line.startsWith("c")) { + currentCmd = line.slice(1); + } + } + if (currentPid != null && currentCmd && currentCmd.toLowerCase().includes("openclaw")) { + pids.push(currentPid); + } + return pids.filter((pid) => pid !== process.pid); +} + +const STALE_SIGTERM_WAIT_MS = 300; +const STALE_SIGKILL_WAIT_MS = 200; + +/** + * Synchronously terminate stale gateway processes. + * Sends SIGTERM, waits briefly, then SIGKILL for survivors. + */ +function terminateStaleProcessesSync(pids: number[]): number[] { + if (pids.length === 0) { + return []; + } + const killed: number[] = []; + for (const pid of pids) { + try { + process.kill(pid, "SIGTERM"); + killed.push(pid); + } catch { + // ESRCH — already gone + } + } + if (killed.length === 0) { + return killed; + } + spawnSync("sleep", [String(STALE_SIGTERM_WAIT_MS / 1000)], { timeout: 2000 }); + for (const pid of killed) { + try { + process.kill(pid, 0); + process.kill(pid, "SIGKILL"); + } catch { + // already gone + } + } + spawnSync("sleep", [String(STALE_SIGKILL_WAIT_MS / 1000)], { timeout: 2000 }); + return killed; +} + +/** + * Inspect the gateway port and kill any stale gateway processes holding it. + * Called before service restart commands to prevent port conflicts. + */ +function cleanStaleGatewayProcessesSync(): number[] { + try { + const port = resolveGatewayPort(undefined, process.env); + const stalePids = findGatewayPidsOnPortSync(port); + if (stalePids.length === 0) { + return []; + } + restartLog.warn( + `killing ${stalePids.length} stale gateway process(es) before restart: ${stalePids.join(", ")}`, + ); + return terminateStaleProcessesSync(stalePids); + } catch { + return []; + } +} + export function triggerOpenClawRestart(): RestartAttempt { if (process.env.VITEST || process.env.NODE_ENV === "test") { return { ok: true, method: "supervisor", detail: "test mode" }; } + + cleanStaleGatewayProcessesSync(); + const tried: string[] = []; if (process.platform !== "darwin") { if (process.platform === "linux") {