* exec: clean up PTY resources on timeout and exit * cli: harden resume cleanup and watchdog stalled runs * cli: productionize PTY and resume reliability paths * docs: add PTY process supervision architecture plan * docs: rewrite PTY supervision plan as pre-rewrite baseline * docs: switch PTY supervision plan to one-go execution * docs: add one-line root cause to PTY supervision plan * docs: add OS contracts and test matrix to PTY supervision plan * docs: define process-supervisor package placement and scope * docs: tie supervisor plan to existing CI lanes * docs: place PTY supervisor plan under src/process * refactor(process): route exec and cli runs through supervisor * docs(process): refresh PTY supervision plan * wip * fix(process): harden supervisor timeout and PTY termination * fix(process): harden supervisor adapters env and wait handling * ci: avoid failing formal conformance on comment permissions * test(ui): fix cron request mock argument typing * fix(ui): remove leftover conflict marker * fix: supervise PTY processes (#14257) (openclaw#14257) (thanks @onutc)
222 lines
5.8 KiB
TypeScript
222 lines
5.8 KiB
TypeScript
import fs from "node:fs/promises";
|
|
import os from "node:os";
|
|
import path from "node:path";
|
|
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
import type { OpenClawConfig } from "../config/config.js";
|
|
import { runCliAgent } from "./cli-runner.js";
|
|
import { resolveCliNoOutputTimeoutMs } from "./cli-runner/helpers.js";
|
|
|
|
const supervisorSpawnMock = vi.fn();
|
|
|
|
vi.mock("../process/supervisor/index.js", () => ({
|
|
getProcessSupervisor: () => ({
|
|
spawn: (...args: unknown[]) => supervisorSpawnMock(...args),
|
|
cancel: vi.fn(),
|
|
cancelScope: vi.fn(),
|
|
reconcileOrphans: vi.fn(),
|
|
getRecord: vi.fn(),
|
|
}),
|
|
}));
|
|
|
|
type MockRunExit = {
|
|
reason:
|
|
| "manual-cancel"
|
|
| "overall-timeout"
|
|
| "no-output-timeout"
|
|
| "spawn-error"
|
|
| "signal"
|
|
| "exit";
|
|
exitCode: number | null;
|
|
exitSignal: NodeJS.Signals | number | null;
|
|
durationMs: number;
|
|
stdout: string;
|
|
stderr: string;
|
|
timedOut: boolean;
|
|
noOutputTimedOut: boolean;
|
|
};
|
|
|
|
function createManagedRun(exit: MockRunExit, pid = 1234) {
|
|
return {
|
|
runId: "run-supervisor",
|
|
pid,
|
|
startedAtMs: Date.now(),
|
|
stdin: undefined,
|
|
wait: vi.fn().mockResolvedValue(exit),
|
|
cancel: vi.fn(),
|
|
};
|
|
}
|
|
|
|
describe("runCliAgent with process supervisor", () => {
|
|
beforeEach(() => {
|
|
supervisorSpawnMock.mockReset();
|
|
});
|
|
|
|
it("runs CLI through supervisor and returns payload", async () => {
|
|
supervisorSpawnMock.mockResolvedValueOnce(
|
|
createManagedRun({
|
|
reason: "exit",
|
|
exitCode: 0,
|
|
exitSignal: null,
|
|
durationMs: 50,
|
|
stdout: "ok",
|
|
stderr: "",
|
|
timedOut: false,
|
|
noOutputTimedOut: false,
|
|
}),
|
|
);
|
|
|
|
const result = await runCliAgent({
|
|
sessionId: "s1",
|
|
sessionFile: "/tmp/session.jsonl",
|
|
workspaceDir: "/tmp",
|
|
prompt: "hi",
|
|
provider: "codex-cli",
|
|
model: "gpt-5.2-codex",
|
|
timeoutMs: 1_000,
|
|
runId: "run-1",
|
|
cliSessionId: "thread-123",
|
|
});
|
|
|
|
expect(result.payloads?.[0]?.text).toBe("ok");
|
|
expect(supervisorSpawnMock).toHaveBeenCalledTimes(1);
|
|
const input = supervisorSpawnMock.mock.calls[0]?.[0] as {
|
|
argv?: string[];
|
|
mode?: string;
|
|
timeoutMs?: number;
|
|
noOutputTimeoutMs?: number;
|
|
replaceExistingScope?: boolean;
|
|
scopeKey?: string;
|
|
};
|
|
expect(input.mode).toBe("child");
|
|
expect(input.argv?.[0]).toBe("codex");
|
|
expect(input.timeoutMs).toBe(1_000);
|
|
expect(input.noOutputTimeoutMs).toBeGreaterThanOrEqual(1_000);
|
|
expect(input.replaceExistingScope).toBe(true);
|
|
expect(input.scopeKey).toContain("thread-123");
|
|
});
|
|
|
|
it("fails with timeout when no-output watchdog trips", async () => {
|
|
supervisorSpawnMock.mockResolvedValueOnce(
|
|
createManagedRun({
|
|
reason: "no-output-timeout",
|
|
exitCode: null,
|
|
exitSignal: "SIGKILL",
|
|
durationMs: 200,
|
|
stdout: "",
|
|
stderr: "",
|
|
timedOut: true,
|
|
noOutputTimedOut: true,
|
|
}),
|
|
);
|
|
|
|
await expect(
|
|
runCliAgent({
|
|
sessionId: "s1",
|
|
sessionFile: "/tmp/session.jsonl",
|
|
workspaceDir: "/tmp",
|
|
prompt: "hi",
|
|
provider: "codex-cli",
|
|
model: "gpt-5.2-codex",
|
|
timeoutMs: 1_000,
|
|
runId: "run-2",
|
|
cliSessionId: "thread-123",
|
|
}),
|
|
).rejects.toThrow("produced no output");
|
|
});
|
|
|
|
it("fails with timeout when overall timeout trips", async () => {
|
|
supervisorSpawnMock.mockResolvedValueOnce(
|
|
createManagedRun({
|
|
reason: "overall-timeout",
|
|
exitCode: null,
|
|
exitSignal: "SIGKILL",
|
|
durationMs: 200,
|
|
stdout: "",
|
|
stderr: "",
|
|
timedOut: true,
|
|
noOutputTimedOut: false,
|
|
}),
|
|
);
|
|
|
|
await expect(
|
|
runCliAgent({
|
|
sessionId: "s1",
|
|
sessionFile: "/tmp/session.jsonl",
|
|
workspaceDir: "/tmp",
|
|
prompt: "hi",
|
|
provider: "codex-cli",
|
|
model: "gpt-5.2-codex",
|
|
timeoutMs: 1_000,
|
|
runId: "run-3",
|
|
cliSessionId: "thread-123",
|
|
}),
|
|
).rejects.toThrow("exceeded timeout");
|
|
});
|
|
|
|
it("falls back to per-agent workspace when workspaceDir is missing", async () => {
|
|
const tempDir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cli-runner-"));
|
|
const fallbackWorkspace = path.join(tempDir, "workspace-main");
|
|
await fs.mkdir(fallbackWorkspace, { recursive: true });
|
|
const cfg = {
|
|
agents: {
|
|
defaults: {
|
|
workspace: fallbackWorkspace,
|
|
},
|
|
},
|
|
} satisfies OpenClawConfig;
|
|
|
|
supervisorSpawnMock.mockResolvedValueOnce(
|
|
createManagedRun({
|
|
reason: "exit",
|
|
exitCode: 0,
|
|
exitSignal: null,
|
|
durationMs: 25,
|
|
stdout: "ok",
|
|
stderr: "",
|
|
timedOut: false,
|
|
noOutputTimedOut: false,
|
|
}),
|
|
);
|
|
|
|
try {
|
|
await runCliAgent({
|
|
sessionId: "s1",
|
|
sessionKey: "agent:main:subagent:missing-workspace",
|
|
sessionFile: "/tmp/session.jsonl",
|
|
workspaceDir: undefined as unknown as string,
|
|
config: cfg,
|
|
prompt: "hi",
|
|
provider: "codex-cli",
|
|
model: "gpt-5.2-codex",
|
|
timeoutMs: 1_000,
|
|
runId: "run-4",
|
|
});
|
|
} finally {
|
|
await fs.rm(tempDir, { recursive: true, force: true });
|
|
}
|
|
|
|
const input = supervisorSpawnMock.mock.calls[0]?.[0] as { cwd?: string };
|
|
expect(input.cwd).toBe(path.resolve(fallbackWorkspace));
|
|
});
|
|
});
|
|
|
|
describe("resolveCliNoOutputTimeoutMs", () => {
|
|
it("uses backend-configured resume watchdog override", () => {
|
|
const timeoutMs = resolveCliNoOutputTimeoutMs({
|
|
backend: {
|
|
command: "codex",
|
|
reliability: {
|
|
watchdog: {
|
|
resume: {
|
|
noOutputTimeoutMs: 42_000,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
timeoutMs: 120_000,
|
|
useResume: true,
|
|
});
|
|
expect(timeoutMs).toBe(42_000);
|
|
});
|
|
});
|