From 73e5bb763530047627d1ff628695aca44295cddf Mon Sep 17 00:00:00 2001 From: Tak Hoffman <781889+Takhoffman@users.noreply.github.com> Date: Sun, 22 Feb 2026 17:04:30 -0600 Subject: [PATCH] Cron: apply timeout to startup catch-up runs (#23966) * Cron: apply timeout to startup catch-up runs * Changelog: add cron startup timeout catch-up note --- CHANGELOG.md | 1 + src/cron/service.issue-regressions.test.ts | 41 +++++++++++++++++++++- src/cron/service/timer.ts | 2 +- 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2aaf763a8..a950f1c76 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,6 +67,7 @@ Docs: https://docs.openclaw.ai - Cron/Delivery: route text-only announce jobs with explicit thread/topic targets through direct outbound delivery so forum/thread destinations do not get dropped by intermediary announce turns. (#23841) Thanks @AndrewArto. - Cron: honor `cron.maxConcurrentRuns` in the timer loop so due jobs can execute up to the configured parallelism instead of always running serially. (#11595) Thanks @Takhoffman. - Cron/Run: enforce the same per-job timeout guard for manual `cron.run` executions as timer-driven runs, including abort propagation for isolated agent jobs, so forced runs cannot wedge indefinitely. (#23704) Thanks @tkuehnl. +- Cron/Startup: enforce per-job timeout guards for startup catch-up replay runs so missed isolated jobs cannot hang indefinitely during gateway boot recovery. - Cron/Schedule: for `every` jobs, prefer `lastRunAtMs + everyMs` when still in the future after restarts, then fall back to anchor scheduling for catch-up windows, so NEXT timing matches the last successful cadence. (#22895) Thanks @SidQin-cyber. - Cron/Service: execute manual `cron.run` jobs outside the cron lock (while still persisting started/finished state atomically) so `cron.list` and `cron.status` remain responsive during long forced runs. (#23628) Thanks @dsgraves. - Cron/Timer: keep a watchdog recheck timer armed while `onTimer` is actively executing so the scheduler continues polling even if a due-run tick stalls for an extended period. (#23628) Thanks @dsgraves. diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index a0838b6f6..c089449ea 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -8,7 +8,7 @@ import { CronService } from "./service.js"; import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js"; import { computeJobNextRunAtMs } from "./service/jobs.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; -import { onTimer } from "./service/timer.js"; +import { onTimer, runMissedJobs } from "./service/timer.js"; import type { CronJob, CronJobState } from "./types.js"; const noopLogger = { @@ -820,6 +820,45 @@ describe("Cron issue regressions", () => { cron.stop(); }); + it("applies timeoutSeconds to startup catch-up isolated executions", async () => { + vi.useRealTimers(); + const store = await makeStorePath(); + const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z"); + const cronJob = createIsolatedRegressionJob({ + id: "startup-timeout", + name: "startup timeout", + scheduledAt, + schedule: { kind: "at", at: new Date(scheduledAt).toISOString() }, + payload: { kind: "agentTurn", message: "work", timeoutSeconds: 0.01 }, + state: { nextRunAtMs: scheduledAt }, + }); + await writeCronJobs(store.storePath, [cronJob]); + + let now = scheduledAt; + const abortAwareRunner = createAbortAwareIsolatedRunner(); + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn(async (params) => { + const result = await abortAwareRunner.runIsolatedAgentJob(params); + now += 5; + return result; + }), + }); + + await runMissedJobs(state); + + expect(abortAwareRunner.getObservedAbortSignal()).toBeDefined(); + expect(abortAwareRunner.getObservedAbortSignal()?.aborted).toBe(true); + const job = state.store?.jobs.find((entry) => entry.id === "startup-timeout"); + expect(job?.state.lastStatus).toBe("error"); + expect(job?.state.lastError).toContain("timed out"); + }); + it("retries cron schedule computation from the next second when the first attempt returns undefined (#17821)", () => { const scheduledAt = Date.parse("2026-02-15T13:00:00.000Z"); const cronJob = createIsolatedRegressionJob({ diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index cb403762b..36e0ff454 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -547,7 +547,7 @@ export async function runMissedJobs( const startedAt = state.deps.nowMs(); emit(state, { jobId: candidate.job.id, action: "started", runAtMs: startedAt }); try { - const result = await executeJobCore(state, candidate.job); + const result = await executeJobCoreWithTimeout(state, candidate.job); outcomes.push({ jobId: candidate.jobId, status: result.status,