Files
Moltbot/src/agents/cli-runner/reliability.ts
Onur cd44a0d01e fix: codex and similar processes keep dying on pty, solved by refactoring process spawning (#14257)
* exec: clean up PTY resources on timeout and exit

* cli: harden resume cleanup and watchdog stalled runs

* cli: productionize PTY and resume reliability paths

* docs: add PTY process supervision architecture plan

* docs: rewrite PTY supervision plan as pre-rewrite baseline

* docs: switch PTY supervision plan to one-go execution

* docs: add one-line root cause to PTY supervision plan

* docs: add OS contracts and test matrix to PTY supervision plan

* docs: define process-supervisor package placement and scope

* docs: tie supervisor plan to existing CI lanes

* docs: place PTY supervisor plan under src/process

* refactor(process): route exec and cli runs through supervisor

* docs(process): refresh PTY supervision plan

* wip

* fix(process): harden supervisor timeout and PTY termination

* fix(process): harden supervisor adapters env and wait handling

* ci: avoid failing formal conformance on comment permissions

* test(ui): fix cron request mock argument typing

* fix(ui): remove leftover conflict marker

* fix: supervise PTY processes (#14257) (openclaw#14257) (thanks @onutc)
2026-02-16 02:32:05 +01:00

89 lines
2.8 KiB
TypeScript

import path from "node:path";
import type { CliBackendConfig } from "../../config/types.js";
import {
CLI_FRESH_WATCHDOG_DEFAULTS,
CLI_RESUME_WATCHDOG_DEFAULTS,
CLI_WATCHDOG_MIN_TIMEOUT_MS,
} from "../cli-watchdog-defaults.js";
function pickWatchdogProfile(
backend: CliBackendConfig,
useResume: boolean,
): {
noOutputTimeoutMs?: number;
noOutputTimeoutRatio: number;
minMs: number;
maxMs: number;
} {
const defaults = useResume ? CLI_RESUME_WATCHDOG_DEFAULTS : CLI_FRESH_WATCHDOG_DEFAULTS;
const configured = useResume
? backend.reliability?.watchdog?.resume
: backend.reliability?.watchdog?.fresh;
const ratio = (() => {
const value = configured?.noOutputTimeoutRatio;
if (typeof value !== "number" || !Number.isFinite(value)) {
return defaults.noOutputTimeoutRatio;
}
return Math.max(0.05, Math.min(0.95, value));
})();
const minMs = (() => {
const value = configured?.minMs;
if (typeof value !== "number" || !Number.isFinite(value)) {
return defaults.minMs;
}
return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
})();
const maxMs = (() => {
const value = configured?.maxMs;
if (typeof value !== "number" || !Number.isFinite(value)) {
return defaults.maxMs;
}
return Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(value));
})();
return {
noOutputTimeoutMs:
typeof configured?.noOutputTimeoutMs === "number" &&
Number.isFinite(configured.noOutputTimeoutMs)
? Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, Math.floor(configured.noOutputTimeoutMs))
: undefined,
noOutputTimeoutRatio: ratio,
minMs: Math.min(minMs, maxMs),
maxMs: Math.max(minMs, maxMs),
};
}
export function resolveCliNoOutputTimeoutMs(params: {
backend: CliBackendConfig;
timeoutMs: number;
useResume: boolean;
}): number {
const profile = pickWatchdogProfile(params.backend, params.useResume);
// Keep watchdog below global timeout in normal cases.
const cap = Math.max(CLI_WATCHDOG_MIN_TIMEOUT_MS, params.timeoutMs - 1_000);
if (profile.noOutputTimeoutMs !== undefined) {
return Math.min(profile.noOutputTimeoutMs, cap);
}
const computed = Math.floor(params.timeoutMs * profile.noOutputTimeoutRatio);
const bounded = Math.min(profile.maxMs, Math.max(profile.minMs, computed));
return Math.min(bounded, cap);
}
export function buildCliSupervisorScopeKey(params: {
backend: CliBackendConfig;
backendId: string;
cliSessionId?: string;
}): string | undefined {
const commandToken = path
.basename(params.backend.command ?? "")
.trim()
.toLowerCase();
const backendToken = params.backendId.trim().toLowerCase();
const sessionToken = params.cliSessionId?.trim();
if (!sessionToken) {
return undefined;
}
return `cli:${backendToken}:${commandToken}:${sessionToken}`;
}