fix: verify gateway restart health after daemon restart
This commit is contained in:
@@ -1,13 +1,38 @@
|
||||
import { loadConfig, resolveGatewayPort } from "../../config/config.js";
|
||||
import { resolveGatewayService } from "../../daemon/service.js";
|
||||
import { defaultRuntime } from "../../runtime.js";
|
||||
import { theme } from "../../terminal/theme.js";
|
||||
import { formatCliCommand } from "../command-format.js";
|
||||
import {
|
||||
runServiceRestart,
|
||||
runServiceStart,
|
||||
runServiceStop,
|
||||
runServiceUninstall,
|
||||
} from "./lifecycle-core.js";
|
||||
import { renderGatewayServiceStartHints } from "./shared.js";
|
||||
import {
|
||||
renderRestartDiagnostics,
|
||||
terminateStaleGatewayPids,
|
||||
waitForGatewayHealthyRestart,
|
||||
} from "./restart-health.js";
|
||||
import { parsePortFromArgs, renderGatewayServiceStartHints } from "./shared.js";
|
||||
import type { DaemonLifecycleOptions } from "./types.js";
|
||||
|
||||
const POST_RESTART_HEALTH_ATTEMPTS = 8;
|
||||
const POST_RESTART_HEALTH_DELAY_MS = 450;
|
||||
|
||||
async function resolveGatewayRestartPort() {
|
||||
const service = resolveGatewayService();
|
||||
const command = await service.readCommand(process.env).catch(() => null);
|
||||
const serviceEnv = command?.environment ?? undefined;
|
||||
const mergedEnv = {
|
||||
...(process.env as Record<string, string | undefined>),
|
||||
...(serviceEnv ?? undefined),
|
||||
} as NodeJS.ProcessEnv;
|
||||
|
||||
const portFromArgs = parsePortFromArgs(command?.programArguments);
|
||||
return portFromArgs ?? resolveGatewayPort(loadConfig(), mergedEnv);
|
||||
}
|
||||
|
||||
export async function runDaemonUninstall(opts: DaemonLifecycleOptions = {}) {
|
||||
return await runServiceUninstall({
|
||||
serviceNoun: "Gateway",
|
||||
@@ -41,11 +66,62 @@ export async function runDaemonStop(opts: DaemonLifecycleOptions = {}) {
|
||||
* Throws/exits on check or restart failures.
|
||||
*/
|
||||
export async function runDaemonRestart(opts: DaemonLifecycleOptions = {}): Promise<boolean> {
|
||||
const json = Boolean(opts.json);
|
||||
const service = resolveGatewayService();
|
||||
const restartPort = await resolveGatewayRestartPort().catch(() =>
|
||||
resolveGatewayPort(loadConfig(), process.env),
|
||||
);
|
||||
|
||||
return await runServiceRestart({
|
||||
serviceNoun: "Gateway",
|
||||
service: resolveGatewayService(),
|
||||
service,
|
||||
renderStartHints: renderGatewayServiceStartHints,
|
||||
opts,
|
||||
checkTokenDrift: true,
|
||||
postRestartCheck: async ({ warnings, fail, stdout }) => {
|
||||
let health = await waitForGatewayHealthyRestart({
|
||||
service,
|
||||
port: restartPort,
|
||||
attempts: POST_RESTART_HEALTH_ATTEMPTS,
|
||||
delayMs: POST_RESTART_HEALTH_DELAY_MS,
|
||||
});
|
||||
|
||||
if (!health.healthy && health.staleGatewayPids.length > 0) {
|
||||
const staleMsg = `Found stale gateway process(es): ${health.staleGatewayPids.join(", ")}.`;
|
||||
warnings.push(staleMsg);
|
||||
if (!json) {
|
||||
defaultRuntime.log(theme.warn(staleMsg));
|
||||
defaultRuntime.log(theme.muted("Stopping stale process(es) and retrying restart..."));
|
||||
}
|
||||
|
||||
await terminateStaleGatewayPids(health.staleGatewayPids);
|
||||
await service.restart({ env: process.env, stdout });
|
||||
health = await waitForGatewayHealthyRestart({
|
||||
service,
|
||||
port: restartPort,
|
||||
attempts: POST_RESTART_HEALTH_ATTEMPTS,
|
||||
delayMs: POST_RESTART_HEALTH_DELAY_MS,
|
||||
});
|
||||
}
|
||||
|
||||
if (health.healthy) {
|
||||
return;
|
||||
}
|
||||
|
||||
const diagnostics = renderRestartDiagnostics(health);
|
||||
if (!json) {
|
||||
defaultRuntime.log(theme.warn("Gateway did not become healthy after restart."));
|
||||
for (const line of diagnostics) {
|
||||
defaultRuntime.log(theme.muted(line));
|
||||
}
|
||||
} else {
|
||||
warnings.push(...diagnostics);
|
||||
}
|
||||
|
||||
fail("Gateway restart failed health checks.", [
|
||||
formatCliCommand("openclaw gateway status --probe --deep"),
|
||||
formatCliCommand("openclaw doctor"),
|
||||
]);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user