diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index 1d727bea8..dd82b52ab 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -9,7 +9,13 @@ import { CronService } from "./service.js"; import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js"; import { computeJobNextRunAtMs } from "./service/jobs.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; -import { DEFAULT_JOB_TIMEOUT_MS, executeJobCore, onTimer, runMissedJobs } from "./service/timer.js"; +import { + DEFAULT_JOB_TIMEOUT_MS, + applyJobResult, + executeJobCore, + onTimer, + runMissedJobs, +} from "./service/timer.js"; import type { CronJob, CronJobState } from "./types.js"; const noopLogger = { @@ -1586,4 +1592,81 @@ describe("Cron issue regressions", () => { expect(job?.state.lastStatus).toBe("error"); expect(job?.state.lastError).toContain("timed out"); }); + + it("keeps state updates when cron next-run computation throws after a successful run (#30905)", () => { + const startedAt = Date.parse("2026-03-02T12:00:00.000Z"); + const endedAt = startedAt + 50; + const state = createCronServiceState({ + cronEnabled: true, + storePath: "/tmp/cron-30905-success.json", + log: noopLogger, + nowMs: () => endedAt, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: createDefaultIsolatedRunner(), + }); + const job = createIsolatedRegressionJob({ + id: "apply-result-success-30905", + name: "apply-result-success-30905", + scheduledAt: startedAt, + schedule: { kind: "cron", expr: "0 7 * * *", tz: "Invalid/Timezone" }, + payload: { kind: "agentTurn", message: "ping" }, + state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 }, + }); + + const shouldDelete = applyJobResult(state, job, { + status: "ok", + delivered: true, + startedAt, + endedAt, + }); + + expect(shouldDelete).toBe(false); + expect(job.state.runningAtMs).toBeUndefined(); + expect(job.state.lastRunAtMs).toBe(startedAt); + expect(job.state.lastStatus).toBe("ok"); + expect(job.state.scheduleErrorCount).toBe(1); + expect(job.state.lastError).toMatch(/^schedule error:/); + expect(job.state.nextRunAtMs).toBe(endedAt + 2_000); + expect(job.enabled).toBe(true); + }); + + it("falls back to backoff schedule when cron next-run computation throws on error path (#30905)", () => { + const startedAt = Date.parse("2026-03-02T12:05:00.000Z"); + const endedAt = startedAt + 25; + const state = createCronServiceState({ + cronEnabled: true, + storePath: "/tmp/cron-30905-error.json", + log: noopLogger, + nowMs: () => endedAt, + enqueueSystemEvent: vi.fn(), + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: createDefaultIsolatedRunner(), + }); + const job = createIsolatedRegressionJob({ + id: "apply-result-error-30905", + name: "apply-result-error-30905", + scheduledAt: startedAt, + schedule: { kind: "cron", expr: "0 7 * * *", tz: "Invalid/Timezone" }, + payload: { kind: "agentTurn", message: "ping" }, + state: { nextRunAtMs: startedAt - 1_000, runningAtMs: startedAt - 500 }, + }); + + const shouldDelete = applyJobResult(state, job, { + status: "error", + error: "synthetic failure", + startedAt, + endedAt, + }); + + expect(shouldDelete).toBe(false); + expect(job.state.runningAtMs).toBeUndefined(); + expect(job.state.lastRunAtMs).toBe(startedAt); + expect(job.state.lastStatus).toBe("error"); + expect(job.state.consecutiveErrors).toBe(1); + expect(job.state.scheduleErrorCount).toBe(1); + expect(job.state.lastError).toMatch(/^schedule error:/); + expect(job.state.nextRunAtMs).toBe(endedAt + 30_000); + expect(job.enabled).toBe(true); + }); }); diff --git a/src/cron/service/jobs.ts b/src/cron/service/jobs.ts index 8c602cd62..ed5f57d78 100644 --- a/src/cron/service/jobs.ts +++ b/src/cron/service/jobs.ts @@ -214,7 +214,7 @@ export function computeJobNextRunAtMs(job: CronJob, nowMs: number): number | und /** Maximum consecutive schedule errors before auto-disabling a job. */ const MAX_SCHEDULE_ERRORS = 3; -function recordScheduleComputeError(params: { +export function recordScheduleComputeError(params: { state: CronServiceState; job: CronJob; err: unknown; diff --git a/src/cron/service/timer.ts b/src/cron/service/timer.ts index 4d38e7c33..9bf9d894e 100644 --- a/src/cron/service/timer.ts +++ b/src/cron/service/timer.ts @@ -15,6 +15,7 @@ import { computeJobNextRunAtMs, nextWakeAtMs, recomputeNextRunsForMaintenance, + recordScheduleComputeError, resolveJobPayloadTextForMain, } from "./jobs.js"; import { locked } from "./locked.js"; @@ -356,7 +357,15 @@ export function applyJobResult( } else if (result.status === "error" && job.enabled) { // Apply exponential backoff for errored jobs to prevent retry storms. const backoff = errorBackoffMs(job.state.consecutiveErrors ?? 1); - const normalNext = computeJobNextRunAtMs(job, result.endedAt); + let normalNext: number | undefined; + try { + normalNext = computeJobNextRunAtMs(job, result.endedAt); + } catch (err) { + // If the schedule expression/timezone throws (croner edge cases), + // record the schedule error (auto-disables after repeated failures) + // and fall back to backoff-only schedule so the state update is not lost. + recordScheduleComputeError({ state, job, err }); + } const backoffNext = result.endedAt + backoff; // Use whichever is later: the natural next run or the backoff delay. job.state.nextRunAtMs = @@ -371,7 +380,15 @@ export function applyJobResult( "cron: applying error backoff", ); } else if (job.enabled) { - const naturalNext = computeJobNextRunAtMs(job, result.endedAt); + let naturalNext: number | undefined; + try { + naturalNext = computeJobNextRunAtMs(job, result.endedAt); + } catch (err) { + // If the schedule expression/timezone throws (croner edge cases), + // record the schedule error (auto-disables after repeated failures) + // so a persistent throw doesn't cause a MIN_REFIRE_GAP_MS hot loop. + recordScheduleComputeError({ state, job, err }); + } if (job.schedule.kind === "cron") { // Safety net: ensure the next fire is at least MIN_REFIRE_GAP_MS // after the current run ended. Prevents spin-loops when the @@ -399,6 +416,10 @@ function applyOutcomeToStoredJob(state: CronServiceState, result: TimedCronRunOu const jobs = store.jobs; const job = jobs.find((entry) => entry.id === result.jobId); if (!job) { + state.deps.log.warn( + { jobId: result.jobId }, + "cron: applyOutcomeToStoredJob — job not found after forceReload, result discarded", + ); return; }