From c3bb723673f4998269618f5cc05b47bedff413a2 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sun, 22 Feb 2026 19:51:54 +0100 Subject: [PATCH] fix(cron): enforce timeout for manual cron runs --- CHANGELOG.md | 1 + src/cron/service.issue-regressions.test.ts | 48 ++++++++++++++++++++++ src/cron/service/ops.ts | 35 +++++++++++++++- 3 files changed, 83 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 11ff315cd..f9d000a42 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ Docs: https://docs.openclaw.ai - Cron/Isolation: force fresh session IDs for isolated cron runs so `sessionTarget="isolated"` executions never reuse prior run context. (#23470) Thanks @echoVic. - Cron/Service: execute manual `cron.run` jobs outside the cron lock (while still persisting started/finished state atomically) so `cron.list` and `cron.status` remain responsive during long forced runs. (#23628) Thanks @dsgraves. - Cron/Timer: keep a watchdog recheck timer armed while `onTimer` is actively executing so the scheduler continues polling even if a due-run tick stalls for an extended period. (#23628) Thanks @dsgraves. +- Cron/Run: enforce the same per-job timeout guard for manual `cron.run` executions as timer-driven runs, including abort propagation for isolated agent jobs, so forced runs cannot wedge indefinitely. (#23704) Thanks @tkuehnl. - Agents/Compaction: restore embedded compaction safeguard/context-pruning extension loading in production by wiring bundled extension factories into the resource loader instead of runtime file-path resolution. (#22349) Thanks @Glucksberg. - Feishu/Media: for inbound video messages that include both `file_key` (video) and `image_key` (thumbnail), prefer `file_key` when downloading media so video attachments are saved instead of silently failing on thumbnail keys. (#23633) - Hooks/Cron: suppress duplicate main-session events for delivered hook turns and mark `SILENT_REPLY_TOKEN` (`NO_REPLY`) early exits as delivered to prevent hook context pollution. (#20678) Thanks @JonathanWorks. diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index 132fe18f8..1ea407a11 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -732,6 +732,54 @@ describe("Cron issue regressions", () => { expect(job?.state.lastError).toContain("timed out"); }); + it("applies timeoutSeconds to manual cron.run isolated executions", async () => { + vi.useRealTimers(); + const store = await makeStorePath(); + let observedAbortSignal: AbortSignal | undefined; + + const cron = await startCronForStore({ + storePath: store.storePath, + runIsolatedAgentJob: vi.fn(async ({ abortSignal }) => { + observedAbortSignal = abortSignal; + await new Promise((resolve) => { + if (!abortSignal) { + return; + } + if (abortSignal.aborted) { + resolve(); + return; + } + abortSignal.addEventListener("abort", () => resolve(), { once: true }); + }); + return { status: "ok" as const, summary: "late" }; + }), + }); + + const job = await cron.add({ + name: "manual timeout", + enabled: true, + schedule: { kind: "every", everyMs: 60_000, anchorMs: Date.now() }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "work", timeoutSeconds: 0.01 }, + delivery: { mode: "none" }, + }); + + const result = await cron.run(job.id, "force"); + expect(result).toEqual({ ok: true, ran: true }); + expect(observedAbortSignal).toBeDefined(); + expect(observedAbortSignal?.aborted).toBe(true); + + const updated = (await cron.list({ includeDisabled: true })).find( + (entry) => entry.id === job.id, + ); + expect(updated?.state.lastStatus).toBe("error"); + expect(updated?.state.lastError).toContain("timed out"); + expect(updated?.state.runningAtMs).toBeUndefined(); + + cron.stop(); + }); + it("retries cron schedule computation from the next second when the first attempt returns undefined (#17821)", () => { const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z"); const cronJob = createIsolatedRegressionJob({ diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 0a60b88f5..bea2af86c 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -13,6 +13,7 @@ import { locked } from "./locked.js"; import type { CronServiceState } from "./state.js"; import { ensureLoaded, persist, warnIfDisabled } from "./store.js"; import { + DEFAULT_JOB_TIMEOUT_MS, applyJobResult, armTimer, emit, @@ -247,8 +248,40 @@ export async function run(state: CronServiceState, id: string, mode?: "due" | "f status: "error"; error: string; }; + const configuredTimeoutMs = + prepared.executionJob.payload.kind === "agentTurn" && + typeof prepared.executionJob.payload.timeoutSeconds === "number" + ? Math.floor(prepared.executionJob.payload.timeoutSeconds * 1_000) + : undefined; + const jobTimeoutMs = + configuredTimeoutMs !== undefined + ? configuredTimeoutMs <= 0 + ? undefined + : configuredTimeoutMs + : DEFAULT_JOB_TIMEOUT_MS; try { - coreResult = await executeJobCore(state, prepared.executionJob); + const runAbortController = typeof jobTimeoutMs === "number" ? new AbortController() : undefined; + coreResult = + typeof jobTimeoutMs === "number" + ? await (async () => { + let timeoutId: NodeJS.Timeout | undefined; + try { + return await Promise.race([ + executeJobCore(state, prepared.executionJob, runAbortController?.signal), + new Promise((_, reject) => { + timeoutId = setTimeout(() => { + runAbortController?.abort(new Error("cron: job execution timed out")); + reject(new Error("cron: job execution timed out")); + }, jobTimeoutMs); + }), + ]); + } finally { + if (timeoutId) { + clearTimeout(timeoutId); + } + } + })() + : await executeJobCore(state, prepared.executionJob); } catch (err) { coreResult = { status: "error", error: String(err) }; }