diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index c793979c1..cac5b0bab 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -212,7 +212,7 @@ describe("Cron issue regressions", () => { await store.cleanup(); }); - it("does not hot-loop zero-delay timers while a run is already in progress", async () => { + it("re-arms timer without hot-looping when a run is already in progress", async () => { const timeoutSpy = vi.spyOn(globalThis, "setTimeout"); const store = await makeStorePath(); const now = Date.parse("2026-02-06T10:05:00.000Z"); @@ -233,8 +233,15 @@ describe("Cron issue regressions", () => { await onTimer(state); - expect(timeoutSpy).not.toHaveBeenCalled(); - expect(state.timer).toBeNull(); + // The timer should be re-armed (not null) so the scheduler stays alive, + // with a fixed MAX_TIMER_DELAY_MS (60s) delay to avoid a hot-loop when + // past-due jobs are waiting. See #12025. + expect(timeoutSpy).toHaveBeenCalled(); + expect(state.timer).not.toBeNull(); + const delays = timeoutSpy.mock.calls + .map(([, delay]) => delay) + .filter((d): d is number => typeof d === "number"); + expect(delays).toContain(60_000); timeoutSpy.mockRestore(); await store.cleanup(); }); diff --git a/src/cron/service.rearm-timer-when-running.test.ts b/src/cron/service.rearm-timer-when-running.test.ts new file mode 100644 index 000000000..21b8f2b95 --- /dev/null +++ b/src/cron/service.rearm-timer-when-running.test.ts @@ -0,0 +1,107 @@ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { CronJob } from "./types.js"; +import { createCronServiceState } from "./service/state.js"; +import { onTimer } from "./service/timer.js"; + +const noopLogger = { + debug: vi.fn(), + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), +}; + +async function makeStorePath() { + const dir = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-cron-")); + return { + storePath: path.join(dir, "cron", "jobs.json"), + cleanup: async () => { + await fs.rm(dir, { recursive: true, force: true }); + }, + }; +} + +function createDueRecurringJob(params: { + id: string; + nowMs: number; + nextRunAtMs: number; +}): CronJob { + return { + id: params.id, + name: params.id, + enabled: true, + deleteAfterRun: false, + createdAtMs: params.nowMs, + updatedAtMs: params.nowMs, + schedule: { kind: "every", everyMs: 5 * 60_000 }, + sessionTarget: "isolated", + wakeMode: "next-heartbeat", + payload: { kind: "agentTurn", message: "test" }, + delivery: { mode: "none" }, + state: { nextRunAtMs: params.nextRunAtMs }, + }; +} + +describe("CronService - timer re-arm when running (#12025)", () => { + beforeEach(() => { + noopLogger.debug.mockClear(); + noopLogger.info.mockClear(); + noopLogger.warn.mockClear(); + noopLogger.error.mockClear(); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("re-arms the timer when onTimer is called while state.running is true", async () => { + const timeoutSpy = vi.spyOn(globalThis, "setTimeout"); + const store = await makeStorePath(); + const now = Date.parse("2026-02-06T10:05:00.000Z"); + + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn().mockResolvedValue({ status: "ok", summary: "ok" }), + }); + + // Simulate a job that is currently running. + state.running = true; + state.store = { + version: 1, + jobs: [ + createDueRecurringJob({ + id: "recurring-job", + nowMs: now, + nextRunAtMs: now + 5 * 60_000, + }), + ], + }; + + // Before the fix in #12025, this would return without re-arming, + // silently killing the scheduler. + await onTimer(state); + + // The timer must be re-armed so the scheduler continues ticking, + // with a fixed 60s delay to avoid hot-looping. + expect(state.timer).not.toBeNull(); + expect(timeoutSpy).toHaveBeenCalled(); + const delays = timeoutSpy.mock.calls + .map(([, delay]) => delay) + .filter((d): d is number => typeof d === "number"); + expect(delays).toContain(60_000); + + // state.running should still be true (onTimer bailed out, didn't + // touch it — the original caller's finally block handles that). + expect(state.running).toBe(true); + + timeoutSpy.mockRestore(); + await store.cleanup(); + }); +}); diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index cda67eb2a..3b446848a 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -158,6 +158,26 @@ export function armTimer(state: CronServiceState) { export async function onTimer(state: CronServiceState) { if (state.running) { + // Re-arm the timer so the scheduler keeps ticking even when a job is + // still executing. Without this, a long-running job (e.g. an agentTurn + // exceeding MAX_TIMER_DELAY_MS) causes the clamped 60 s timer to fire + // while `running` is true. The early return then leaves no timer set, + // silently killing the scheduler until the next gateway restart. + // + // We use MAX_TIMER_DELAY_MS as a fixed re-check interval to avoid a + // zero-delay hot-loop when past-due jobs are waiting for the current + // execution to finish. + // See: https://github.com/openclaw/openclaw/issues/12025 + if (state.timer) { + clearTimeout(state.timer); + } + state.timer = setTimeout(async () => { + try { + await onTimer(state); + } catch (err) { + state.deps.log.error({ err: String(err) }, "cron: timer tick failed"); + } + }, MAX_TIMER_DELAY_MS); return; } state.running = true;