From fec4be8dec67019bf058f0d5a1b9f4fb4cd49f1b Mon Sep 17 00:00:00 2001 From: pierreeurope Date: Mon, 16 Feb 2026 14:35:49 +0100 Subject: [PATCH] fix(cron): prevent daily jobs from skipping days (48h jump) #17852 (#17903) Merged via /review-pr -> /prepare-pr -> /merge-pr. Prepared head SHA: 1ffe6a45afac27fd5b0a8c4fd087f7a8fadd1143 Co-authored-by: pierreeurope <248892285+pierreeurope@users.noreply.github.com> Co-authored-by: sebslight <19554889+sebslight@users.noreply.github.com> Reviewed-by: @sebslight --- CHANGELOG.md | 1 + .../service.issue-13992-regression.test.ts | 41 +++++++ .../service.issue-17852-daily-skip.test.ts | 101 ++++++++++++++++++ src/cron/service/jobs.ts | 67 ++++++++---- src/cron/service/timer.ts | 8 +- 5 files changed, 195 insertions(+), 23 deletions(-) create mode 100644 src/cron/service.issue-17852-daily-skip.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b26440171..5b540aa70 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -65,6 +65,7 @@ Docs: https://docs.openclaw.ai - Auto-reply/WhatsApp/TUI/Web: when a final assistant message is `NO_REPLY` and a messaging tool send succeeded, mirror the delivered messaging-tool text into session-visible assistant output so TUI/Web no longer show `NO_REPLY` placeholders. (#7010) Thanks @Morrowind-Xie. - Auto-reply/TTS: keep tool-result media delivery enabled in group chats and native command sessions (while still suppressing tool summary text) so `NO_REPLY` follow-ups do not drop successful TTS audio. (#17991) Thanks @zerone0x. - Cron: infer `payload.kind="agentTurn"` for model-only `cron.update` payload patches, so partial agent-turn updates do not fail validation when `kind` is omitted. (#15664) Thanks @rodrigouroz. +- Cron: preserve per-job schedule-error isolation in post-run maintenance recompute so malformed sibling jobs no longer abort persistence of successful runs. (#17852) Thanks @pierreeurope. - TUI: make searchable-select filtering and highlight rendering ANSI-aware so queries ignore hidden escape codes and no longer corrupt ANSI styling sequences during match highlighting. (#4519) Thanks @bee4come. - TUI/Windows: coalesce rapid single-line submit bursts in Git Bash into one multiline message as a fallback when bracketed paste is unavailable, preventing pasted multiline text from being split into multiple sends. (#4986) Thanks @adamkane. - TUI: suppress false `(no output)` placeholders for non-local empty final events during concurrent runs, preventing external-channel replies from showing empty assistant bubbles while a local run is still streaming. (#5782) Thanks @LagWizard and @vignesh07. diff --git a/src/cron/service.issue-13992-regression.test.ts b/src/cron/service.issue-13992-regression.test.ts index 256060093..5ae776b01 100644 --- a/src/cron/service.issue-13992-regression.test.ts +++ b/src/cron/service.issue-13992-regression.test.ts @@ -127,4 +127,45 @@ describe("issue #13992 regression - cron jobs skip execution", () => { // But should NOT have changed nextRunAtMs (it's still future) expect(job.state.nextRunAtMs).toBe(futureTime); }); + + it("isolates schedule errors while filling missing nextRunAtMs", () => { + const now = Date.now(); + const pastDue = now - 1_000; + + const dueJob: CronJob = { + id: "due-job", + name: "due job", + enabled: true, + schedule: { kind: "cron", expr: "0 8 * * *", tz: "UTC" }, + payload: { kind: "systemEvent", text: "due" }, + sessionTarget: "main", + createdAtMs: now - 3600_000, + updatedAtMs: now - 3600_000, + state: { + nextRunAtMs: pastDue, + }, + }; + + const malformedJob: CronJob = { + id: "bad-job", + name: "bad job", + enabled: true, + schedule: { kind: "cron", expr: "not a valid cron", tz: "UTC" }, + payload: { kind: "systemEvent", text: "bad" }, + sessionTarget: "main", + createdAtMs: now - 3600_000, + updatedAtMs: now - 3600_000, + state: { + // missing nextRunAtMs + }, + }; + + const state = createMockState([dueJob, malformedJob]); + + expect(() => recomputeNextRunsForMaintenance(state)).not.toThrow(); + expect(dueJob.state.nextRunAtMs).toBe(pastDue); + expect(malformedJob.state.nextRunAtMs).toBeUndefined(); + expect(malformedJob.state.scheduleErrorCount).toBe(1); + expect(malformedJob.state.lastError).toMatch(/^schedule error:/); + }); }); diff --git a/src/cron/service.issue-17852-daily-skip.test.ts b/src/cron/service.issue-17852-daily-skip.test.ts new file mode 100644 index 000000000..2b04472a0 --- /dev/null +++ b/src/cron/service.issue-17852-daily-skip.test.ts @@ -0,0 +1,101 @@ +import { describe, expect, it } from "vitest"; +import type { CronServiceState } from "./service/state.js"; +import type { CronJob } from "./types.js"; +import { recomputeNextRuns, recomputeNextRunsForMaintenance } from "./service/jobs.js"; + +/** + * Regression test for issue #17852: daily cron jobs skip a day (48h jump). + * + * Root cause: onTimer's results-processing block used the full + * recomputeNextRuns which could silently advance a past-due nextRunAtMs + * for a job that became due between findDueJobs and the post-execution + * locked block — skipping that run and jumping 48h ahead. + * + * Fix: use recomputeNextRunsForMaintenance in the post-execution block, + * which only fills in missing nextRunAtMs values and never overwrites + * existing (including past-due) ones. + */ +describe("issue #17852 - daily cron jobs should not skip days", () => { + const HOUR_MS = 3_600_000; + const DAY_MS = 24 * HOUR_MS; + + function createMockState(jobs: CronJob[], nowMs: number): CronServiceState { + return { + store: { version: 1, jobs }, + running: false, + timer: null, + storeLoadedAtMs: nowMs, + deps: { + storePath: "/mock/path", + cronEnabled: true, + nowMs: () => nowMs, + log: { + debug: () => {}, + info: () => {}, + warn: () => {}, + error: () => {}, + } as never, + }, + }; + } + + it("recomputeNextRunsForMaintenance should NOT advance past-due nextRunAtMs", () => { + // Simulate: job scheduled for 3:00 AM, timer processing happens at 3:00:01 + // The job was NOT executed in this tick (e.g., it became due between + // findDueJobs and the post-execution block). + const threeAM = Date.parse("2026-02-16T03:00:00.000Z"); + const now = threeAM + 1_000; // 3:00:01 + + const job: CronJob = { + id: "daily-job", + name: "daily 3am", + enabled: true, + schedule: { kind: "cron", expr: "0 3 * * *", tz: "UTC" }, + payload: { kind: "systemEvent", text: "daily task" }, + sessionTarget: "main", + createdAtMs: threeAM - DAY_MS, + updatedAtMs: threeAM - DAY_MS, + state: { + nextRunAtMs: threeAM, // Past-due by 1 second + }, + }; + + const state = createMockState([job], now); + recomputeNextRunsForMaintenance(state); + + // Maintenance should NOT touch existing past-due nextRunAtMs. + // The job should still be eligible for execution on the next timer tick. + expect(job.state.nextRunAtMs).toBe(threeAM); + }); + + it("full recomputeNextRuns WOULD silently advance past-due nextRunAtMs (the bug)", () => { + // This test documents the buggy behavior that caused #17852. + // The full recomputeNextRuns sees a past-due nextRunAtMs and advances it + // to the next occurrence WITHOUT executing the job. + const threeAM = Date.parse("2026-02-16T03:00:00.000Z"); + const now = threeAM + 1_000; // 3:00:01 + + const job: CronJob = { + id: "daily-job", + name: "daily 3am", + enabled: true, + schedule: { kind: "cron", expr: "0 3 * * *", tz: "UTC" }, + payload: { kind: "systemEvent", text: "daily task" }, + sessionTarget: "main", + createdAtMs: threeAM - DAY_MS, + updatedAtMs: threeAM - DAY_MS, + state: { + nextRunAtMs: threeAM, // Past-due by 1 second + }, + }; + + const state = createMockState([job], now); + recomputeNextRuns(state); + + // The full recomputeNextRuns advances it to TOMORROW — skipping today's + // execution entirely. This is the 48h jump bug: from the previous run + // (yesterday 3 AM) to the newly computed next run (tomorrow 3 AM). + const tomorrowThreeAM = threeAM + DAY_MS; + expect(job.state.nextRunAtMs).toBe(tomorrowThreeAM); + }); +}); diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 4185987f5..f5e96e2b4 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -102,6 +102,35 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und /** Maximum consecutive schedule errors before auto-disabling a job. */ const MAX_SCHEDULE_ERRORS = 3; +function recordScheduleComputeError(params: { + state: CronServiceState; + job: CronJob; + err: unknown; +}): boolean { + const { state, job, err } = params; + const errorCount = (job.state.scheduleErrorCount ?? 0) + 1; + const errText = String(err); + + job.state.scheduleErrorCount = errorCount; + job.state.nextRunAtMs = undefined; + job.state.lastError = `schedule error: ${errText}`; + + if (errorCount >= MAX_SCHEDULE_ERRORS) { + job.enabled = false; + state.deps.log.error( + { jobId: job.id, name: job.name, errorCount, err: errText }, + "cron: auto-disabled job after repeated schedule errors", + ); + } else { + state.deps.log.warn( + { jobId: job.id, name: job.name, errorCount, err: errText }, + "cron: failed to compute next run for job (skipping)", + ); + } + + return true; +} + function normalizeJobTickState(params: { state: CronServiceState; job: CronJob; nowMs: number }): { changed: boolean; skip: boolean; @@ -184,23 +213,8 @@ export function recomputeNextRuns(state: CronServiceState): boolean { changed = true; } } catch (err) { - const errorCount = (job.state.scheduleErrorCount ?? 0) + 1; - job.state.scheduleErrorCount = errorCount; - job.state.nextRunAtMs = undefined; - job.state.lastError = `schedule error: ${String(err)}`; - changed = true; - - if (errorCount >= MAX_SCHEDULE_ERRORS) { - job.enabled = false; - state.deps.log.error( - { jobId: job.id, name: job.name, errorCount, err: String(err) }, - "cron: auto-disabled job after repeated schedule errors", - ); - } else { - state.deps.log.warn( - { jobId: job.id, name: job.name, errorCount, err: String(err) }, - "cron: failed to compute next run for job (skipping)", - ); + if (recordScheduleComputeError({ state, job, err })) { + changed = true; } } } @@ -222,10 +236,21 @@ export function recomputeNextRunsForMaintenance(state: CronServiceState): boolea // If a job was past-due but not found by findDueJobs, recomputing would // cause it to be silently skipped. if (job.state.nextRunAtMs === undefined) { - const newNext = computeJobNextRunAtMs(job, now); - if (newNext !== undefined) { - job.state.nextRunAtMs = newNext; - changed = true; + try { + const newNext = computeJobNextRunAtMs(job, now); + if (job.state.nextRunAtMs !== newNext) { + job.state.nextRunAtMs = newNext; + changed = true; + } + // Clear schedule error count on successful computation. + if (job.state.scheduleErrorCount) { + job.state.scheduleErrorCount = undefined; + changed = true; + } + } catch (err) { + if (recordScheduleComputeError({ state, job, err })) { + changed = true; + } } } return changed; diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 674f191a8..1d26401af 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -7,7 +7,6 @@ import { sweepCronRunSessions } from "../session-reaper.js"; import { computeJobNextRunAtMs, nextWakeAtMs, - recomputeNextRuns, recomputeNextRunsForMaintenance, resolveJobPayloadTextForMain, } from "./jobs.js"; @@ -283,7 +282,12 @@ export async function onTimer(state: CronServiceState) { } } - recomputeNextRuns(state); + // Use maintenance-only recompute to avoid advancing past-due + // nextRunAtMs values that became due between findDueJobs and this + // locked block. The full recomputeNextRuns would silently skip + // those jobs (advancing nextRunAtMs without execution), causing + // daily cron schedules to jump 48 h instead of 24 h (#17852). + recomputeNextRunsForMaintenance(state); await persist(state); }); }