From 01641b34ea62b586cd2380bfe106c86b0d227e6f Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 8 Jan 2026 21:28:40 +0100 Subject: [PATCH] feat(doctor): audit supervisor config + docs --- CHANGELOG.md | 1 + docs/gateway/doctor.md | 13 +- docs/gateway/index.md | 8 ++ docs/platforms/linux.md | 6 +- src/cli/daemon-cli.ts | 18 +++ src/commands/doctor-gateway-services.ts | 90 +++++++++++++ src/commands/doctor.ts | 7 + src/daemon/service-audit.ts | 165 ++++++++++++++++++++++++ src/daemon/systemd.ts | 6 + 9 files changed, 310 insertions(+), 4 deletions(-) create mode 100644 src/daemon/service-audit.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 01b2c09af..61e63120e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Unreleased +- Doctor/Daemon: audit supervisor configs, recommend doctor from daemon status, and document user vs system services. (#?) — thanks @steipete - Daemon: align generated systemd unit with docs for network-online + restart delay. (#479) — thanks @azade-c - Outbound: default Telegram account selection for config-only tokens; remove heartbeat-specific accountId handling. (follow-up #516) — thanks @YuriNachos - Cron: allow Telegram delivery targets with topic/thread IDs (e.g. `-100…:topic:123`). (#474) — thanks @mitschabaude-bot diff --git a/docs/gateway/doctor.md b/docs/gateway/doctor.md index 0b10f8d56..2bfc221ab 100644 --- a/docs/gateway/doctor.md +++ b/docs/gateway/doctor.md @@ -51,6 +51,7 @@ cat ~/.clawdbot/clawdbot.json - Sandbox image repair when sandboxing is enabled. - Legacy service migration and extra gateway detection. - Gateway runtime checks (service installed but not running; cached launchd label). +- Supervisor config audit (launchd/systemd/schtasks) with optional repair. - Gateway port collision diagnostics (default `18789`). - Security warnings for open DM policies. - systemd linger check on Linux. @@ -143,17 +144,23 @@ workspace. Doctor runs a health check and offers to restart the gateway when it looks unhealthy. -### 11) Gateway runtime + port diagnostics +### 11) Supervisor config audit + repair +Doctor checks the installed supervisor config (launchd/systemd/schtasks) for +missing or outdated defaults (e.g., systemd network-online dependencies and +restart delay). When it finds a mismatch, it recommends an update and can +rewrite the service file/task to the current defaults. + +### 12) Gateway runtime + port diagnostics Doctor inspects the daemon runtime (PID, last exit status) and warns when the service is installed but not actually running. It also checks for port collisions on the gateway port (default `18789`) and reports likely causes (gateway already running, SSH tunnel). -### 12) Config write + wizard metadata +### 13) Config write + wizard metadata Doctor persists any config changes and stamps wizard metadata to record the doctor run. -### 13) Workspace tips (backup + memory system) +### 14) Workspace tips (backup + memory system) Doctor suggests a workspace memory system when missing and prints a backup tip if the workspace is not already under git. diff --git a/docs/gateway/index.md b/docs/gateway/index.md index f30ce3db6..9179b1c13 100644 --- a/docs/gateway/index.md +++ b/docs/gateway/index.md @@ -189,6 +189,14 @@ Bundled mac app: - `launchctl` only works if the LaunchAgent is installed; otherwise use `clawdbot daemon install` first. ## Supervision (systemd user unit) +Clawdbot installs a **systemd user service** by default on Linux/WSL2. We +recommend user services for single-user machines (simpler env, per-user config). +Use a **system service** for multi-user or always-on servers (no lingering +required, shared supervision). + +`clawdbot daemon install` writes the user unit. `clawdbot doctor` audits the +unit and can update it to match the current recommended defaults. + Create `~/.config/systemd/user/clawdbot-gateway.service`: ``` [Unit] diff --git a/docs/platforms/linux.md b/docs/platforms/linux.md index 819462199..1c4913983 100644 --- a/docs/platforms/linux.md +++ b/docs/platforms/linux.md @@ -54,7 +54,11 @@ clawdbot doctor ``` ## System control (systemd user unit) -Full unit example lives in the [Gateway runbook](/gateway). Minimal setup: +Clawdbot installs a systemd **user** service by default. Use a **system** +service for shared or always-on servers. The full unit example and guidance +live in the [Gateway runbook](/gateway). + +Minimal setup: Create `~/.config/systemd/user/clawdbot-gateway.service`: diff --git a/src/cli/daemon-cli.ts b/src/cli/daemon-cli.ts index d24ed3b08..f135fc868 100644 --- a/src/cli/daemon-cli.ts +++ b/src/cli/daemon-cli.ts @@ -33,6 +33,8 @@ import { resolveGatewayLogPaths } from "../daemon/launchd.js"; import { findLegacyGatewayServices } from "../daemon/legacy.js"; import { resolveGatewayProgramArguments } from "../daemon/program-args.js"; import { resolveGatewayService } from "../daemon/service.js"; +import type { ServiceConfigAudit } from "../daemon/service-audit.js"; +import { auditGatewayServiceConfig } from "../daemon/service-audit.js"; import { callGateway } from "../gateway/call.js"; import { resolveGatewayBindHost } from "../gateway/net.js"; import { @@ -89,6 +91,7 @@ type DaemonStatus = { cachedLabel?: boolean; missingUnit?: boolean; }; + configAudit?: ServiceConfigAudit; }; config?: { cli: ConfigSummary; @@ -343,6 +346,10 @@ async function gatherDaemonStatus(opts: { service.readCommand(process.env).catch(() => null), service.readRuntime(process.env).catch(() => undefined), ]); + const configAudit = await auditGatewayServiceConfig({ + env: process.env, + command, + }); const serviceEnv = command?.environment ?? undefined; const mergedDaemonEnv = { @@ -484,6 +491,7 @@ async function gatherDaemonStatus(opts: { notLoadedText: service.notLoadedText, command, runtime, + configAudit, }, config: { cli: cliConfigSummary, @@ -538,6 +546,16 @@ function printDaemonStatus(status: DaemonStatus, opts: { json: boolean }) { if (daemonEnvLines.length > 0) { defaultRuntime.log(`Daemon env: ${daemonEnvLines.join(" ")}`); } + if (service.configAudit?.issues.length) { + defaultRuntime.error( + "Service config looks out of date or non-standard.", + ); + for (const issue of service.configAudit.issues) { + const detail = issue.detail ? ` (${issue.detail})` : ""; + defaultRuntime.error(`Service config issue: ${issue.message}${detail}`); + } + defaultRuntime.error('Recommendation: run "clawdbot doctor".'); + } if (status.config) { const cliCfg = `${status.config.cli.path}${status.config.cli.exists ? "" : " (missing)"}${status.config.cli.valid ? "" : " (invalid)"}`; defaultRuntime.log(`Config (cli): ${cliCfg}`); diff --git a/src/commands/doctor-gateway-services.ts b/src/commands/doctor-gateway-services.ts index 113fd9b17..015fadb94 100644 --- a/src/commands/doctor-gateway-services.ts +++ b/src/commands/doctor-gateway-services.ts @@ -15,6 +15,7 @@ import { } from "../daemon/legacy.js"; import { resolveGatewayProgramArguments } from "../daemon/program-args.js"; import { resolveGatewayService } from "../daemon/service.js"; +import { auditGatewayServiceConfig } from "../daemon/service-audit.js"; import type { RuntimeEnv } from "../runtime.js"; import { DEFAULT_GATEWAY_DAEMON_RUNTIME, @@ -23,6 +24,18 @@ import { } from "./daemon-runtime.js"; import type { DoctorOptions, DoctorPrompter } from "./doctor-prompter.js"; +function detectGatewayRuntime( + programArguments: string[] | undefined, +): GatewayDaemonRuntime { + const first = programArguments?.[0]; + if (first) { + const base = path.basename(first).toLowerCase(); + if (base === "bun" || base === "bun.exe") return "bun"; + if (base === "node" || base === "node.exe") return "node"; + } + return DEFAULT_GATEWAY_DAEMON_RUNTIME; +} + export async function maybeMigrateLegacyGatewayService( cfg: ClawdbotConfig, mode: "local" | "remote", @@ -112,6 +125,83 @@ export async function maybeMigrateLegacyGatewayService( }); } +export async function maybeRepairGatewayServiceConfig( + cfg: ClawdbotConfig, + mode: "local" | "remote", + runtime: RuntimeEnv, + prompter: DoctorPrompter, +) { + if (resolveIsNixMode(process.env)) { + note("Nix mode detected; skip service updates.", "Gateway"); + return; + } + + if (mode === "remote") { + note("Gateway mode is remote; skipped local service audit.", "Gateway"); + return; + } + + const service = resolveGatewayService(); + const command = await service.readCommand(process.env).catch(() => null); + if (!command) return; + + const audit = await auditGatewayServiceConfig({ + env: process.env, + command, + }); + if (audit.issues.length === 0) return; + + note( + audit.issues + .map((issue) => + issue.detail ? `- ${issue.message} (${issue.detail})` : `- ${issue.message}`, + ) + .join("\n"), + "Gateway service config", + ); + + const repair = await prompter.confirmSkipInNonInteractive({ + message: "Update gateway service config to the recommended defaults now?", + initialValue: true, + }); + if (!repair) return; + + const devMode = + process.argv[1]?.includes(`${path.sep}src${path.sep}`) && + process.argv[1]?.endsWith(".ts"); + const port = resolveGatewayPort(cfg, process.env); + const runtimeChoice = detectGatewayRuntime(command.programArguments); + const { programArguments, workingDirectory } = + await resolveGatewayProgramArguments({ + port, + dev: devMode, + runtime: runtimeChoice, + }); + const environment: Record = { + PATH: process.env.PATH, + CLAWDBOT_PROFILE: process.env.CLAWDBOT_PROFILE, + CLAWDBOT_STATE_DIR: process.env.CLAWDBOT_STATE_DIR, + CLAWDBOT_CONFIG_PATH: process.env.CLAWDBOT_CONFIG_PATH, + CLAWDBOT_GATEWAY_PORT: String(port), + CLAWDBOT_GATEWAY_TOKEN: + cfg.gateway?.auth?.token ?? process.env.CLAWDBOT_GATEWAY_TOKEN, + CLAWDBOT_LAUNCHD_LABEL: + process.platform === "darwin" ? GATEWAY_LAUNCH_AGENT_LABEL : undefined, + }; + + try { + await service.install({ + env: process.env, + stdout: process.stdout, + programArguments, + workingDirectory, + environment, + }); + } catch (err) { + runtime.error(`Gateway service update failed: ${String(err)}`); + } +} + export async function maybeScanExtraGatewayServices(options: DoctorOptions) { const extraServices = await findExtraGatewayServices(process.env, { deep: options.deep, diff --git a/src/commands/doctor.ts b/src/commands/doctor.ts index 7c04e0a3d..d999d41e4 100644 --- a/src/commands/doctor.ts +++ b/src/commands/doctor.ts @@ -30,6 +30,7 @@ import { } from "./doctor-format.js"; import { maybeMigrateLegacyGatewayService, + maybeRepairGatewayServiceConfig, maybeScanExtraGatewayServices, } from "./doctor-gateway-services.js"; import { @@ -157,6 +158,12 @@ export async function doctorCommand( prompter, ); await maybeScanExtraGatewayServices(options); + await maybeRepairGatewayServiceConfig( + cfg, + resolveMode(cfg), + runtime, + prompter, + ); await noteSecurityWarnings(cfg); diff --git a/src/daemon/service-audit.ts b/src/daemon/service-audit.ts new file mode 100644 index 000000000..c8ae0c8b8 --- /dev/null +++ b/src/daemon/service-audit.ts @@ -0,0 +1,165 @@ +import fs from "node:fs/promises"; +import { resolveLaunchAgentPlistPath } from "./launchd.js"; +import { resolveSystemdUserUnitPath } from "./systemd.js"; + +export type GatewayServiceCommand = { + programArguments: string[]; + workingDirectory?: string; + environment?: Record; + sourcePath?: string; +} | null; + +export type ServiceConfigIssue = { + code: string; + message: string; + detail?: string; +}; + +export type ServiceConfigAudit = { + ok: boolean; + issues: ServiceConfigIssue[]; +}; + +function hasGatewaySubcommand(programArguments?: string[]): boolean { + return Boolean(programArguments?.some((arg) => arg === "gateway")); +} + +function parseSystemdUnit(content: string): { + after: Set; + wants: Set; + restartSec?: string; +} { + const after = new Set(); + const wants = new Set(); + let restartSec: string | undefined; + + for (const rawLine of content.split(/\r?\n/)) { + const line = rawLine.trim(); + if (!line) continue; + if (line.startsWith("#") || line.startsWith(";")) continue; + if (line.startsWith("[")) continue; + const idx = line.indexOf("="); + if (idx <= 0) continue; + const key = line.slice(0, idx).trim(); + const value = line.slice(idx + 1).trim(); + if (!value) continue; + if (key === "After") { + for (const entry of value.split(/\s+/)) { + if (entry) after.add(entry); + } + } else if (key === "Wants") { + for (const entry of value.split(/\s+/)) { + if (entry) wants.add(entry); + } + } else if (key === "RestartSec") { + restartSec = value; + } + } + + return { after, wants, restartSec }; +} + +function isRestartSecPreferred(value: string | undefined): boolean { + if (!value) return false; + const parsed = Number.parseFloat(value); + if (!Number.isFinite(parsed)) return false; + return Math.abs(parsed - 5) < 0.01; +} + +async function auditSystemdUnit( + env: Record, + issues: ServiceConfigIssue[], +) { + const unitPath = resolveSystemdUserUnitPath(env); + let content = ""; + try { + content = await fs.readFile(unitPath, "utf8"); + } catch { + return; + } + + const parsed = parseSystemdUnit(content); + if (!parsed.after.has("network-online.target")) { + issues.push({ + code: "systemd-after-network-online", + message: "Missing systemd After=network-online.target", + detail: unitPath, + }); + } + if (!parsed.wants.has("network-online.target")) { + issues.push({ + code: "systemd-wants-network-online", + message: "Missing systemd Wants=network-online.target", + detail: unitPath, + }); + } + if (!isRestartSecPreferred(parsed.restartSec)) { + issues.push({ + code: "systemd-restart-sec", + message: "RestartSec does not match the recommended 5s", + detail: unitPath, + }); + } +} + +async function auditLaunchdPlist( + env: Record, + issues: ServiceConfigIssue[], +) { + const plistPath = resolveLaunchAgentPlistPath(env); + let content = ""; + try { + content = await fs.readFile(plistPath, "utf8"); + } catch { + return; + } + + const hasRunAtLoad = /RunAtLoad<\/key>\s*/i.test(content); + const hasKeepAlive = /KeepAlive<\/key>\s*/i.test(content); + if (!hasRunAtLoad) { + issues.push({ + code: "launchd-run-at-load", + message: "LaunchAgent is missing RunAtLoad=true", + detail: plistPath, + }); + } + if (!hasKeepAlive) { + issues.push({ + code: "launchd-keep-alive", + message: "LaunchAgent is missing KeepAlive=true", + detail: plistPath, + }); + } +} + +function auditGatewayCommand( + programArguments: string[] | undefined, + issues: ServiceConfigIssue[], +) { + if (!programArguments || programArguments.length === 0) return; + if (!hasGatewaySubcommand(programArguments)) { + issues.push({ + code: "gateway-command-missing", + message: "Service command does not include the gateway subcommand", + }); + } +} + +export async function auditGatewayServiceConfig(params: { + env: Record; + command: GatewayServiceCommand; + platform?: NodeJS.Platform; +}): Promise { + const issues: ServiceConfigIssue[] = []; + const platform = params.platform ?? process.platform; + + auditGatewayCommand(params.command?.programArguments, issues); + + if (platform === "linux") { + await auditSystemdUnit(params.env, issues); + } else if (platform === "darwin") { + await auditLaunchdPlist(params.env, issues); + } + + return { ok: issues.length === 0, issues }; +} diff --git a/src/daemon/systemd.ts b/src/daemon/systemd.ts index 2b659ec77..f5fdc4829 100644 --- a/src/daemon/systemd.ts +++ b/src/daemon/systemd.ts @@ -33,6 +33,12 @@ function resolveSystemdUnitPath( return resolveSystemdUnitPathForName(env, GATEWAY_SYSTEMD_SERVICE_NAME); } +export function resolveSystemdUserUnitPath( + env: Record, +): string { + return resolveSystemdUnitPath(env); +} + function resolveLoginctlUser( env: Record, ): string | null {