From be8930d6f9eb00ad124a67bc9219de4827ca836b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yasunori=20Morishima=EF=BC=88=E7=9B=9B=E5=B3=B6=E5=BA=B7?= =?UTF-8?q?=E5=BE=B3=EF=BC=89?= Date: Mon, 2 Mar 2026 22:47:36 +0900 Subject: [PATCH] fix: clear stale runningAtMs in cron.run() before already-running check (#17949) Add recomputeNextRunsForMaintenance() call in run() so that stale runningAtMs markers (from a crashed Phase-1 persist) are cleared by the existing normalizeJobTickState logic before the already-running guard. Without this, a manual cron.run() could be blocked for up to STUCK_RUN_MS (2 hours) even though no job was actually running. Fixes #17554 Co-authored-by: Claude Opus 4.6 --- src/cron/service.issue-regressions.test.ts | 56 ++++++++++++++++++++++ src/cron/service/ops.ts | 4 ++ 2 files changed, 60 insertions(+) diff --git a/src/cron/service.issue-regressions.test.ts b/src/cron/service.issue-regressions.test.ts index dd82b52ab..ba32e3fdf 100644 --- a/src/cron/service.issue-regressions.test.ts +++ b/src/cron/service.issue-regressions.test.ts @@ -8,6 +8,7 @@ import * as schedule from "./schedule.js"; import { CronService } from "./service.js"; import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js"; import { computeJobNextRunAtMs } from "./service/jobs.js"; +import { run } from "./service/ops.js"; import { createCronServiceState, type CronEvent } from "./service/state.js"; import { DEFAULT_JOB_TIMEOUT_MS, @@ -1450,6 +1451,61 @@ describe("Cron issue regressions", () => { expect(startedAtEvents).toEqual([dueAt, dueAt + 50]); }); + it("#17554: run() clears stale runningAtMs and executes the job", async () => { + const store = await makeStorePath(); + const now = Date.parse("2026-02-06T10:05:00.000Z"); + const staleRunningAtMs = now - 2 * 60 * 60 * 1000 - 1; + + await fs.writeFile( + store.storePath, + JSON.stringify( + { + version: 1, + jobs: [ + { + id: "stale-running", + name: "stale-running", + enabled: true, + createdAtMs: now - 3_600_000, + updatedAtMs: now - 3_600_000, + schedule: { kind: "at", at: new Date(now - 60_000).toISOString() }, + sessionTarget: "main", + wakeMode: "now", + payload: { kind: "systemEvent", text: "stale-running" }, + state: { + runningAtMs: staleRunningAtMs, + lastRunAtMs: now - 3_600_000, + lastStatus: "ok", + nextRunAtMs: now - 60_000, + }, + }, + ], + }, + null, + 2, + ), + "utf-8", + ); + + const enqueueSystemEvent = vi.fn(); + const state = createCronServiceState({ + cronEnabled: true, + storePath: store.storePath, + log: noopLogger, + nowMs: () => now, + enqueueSystemEvent, + requestHeartbeatNow: vi.fn(), + runIsolatedAgentJob: vi.fn().mockResolvedValue({ status: "ok", summary: "ok" }), + }); + + const result = await run(state, "stale-running", "force"); + expect(result).toEqual({ ok: true, ran: true }); + expect(enqueueSystemEvent).toHaveBeenCalledWith( + "stale-running", + expect.objectContaining({ agentId: undefined }), + ); + }); + it("honors cron maxConcurrentRuns for due jobs", async () => { vi.useRealTimers(); const store = await makeStorePath(); diff --git a/src/cron/service/ops.ts b/src/cron/service/ops.ts index 2b7ebf57f..dd02ca4ab 100644 --- a/src/cron/service/ops.ts +++ b/src/cron/service/ops.ts @@ -341,6 +341,10 @@ export async function run(state: CronServiceState, id: string, mode?: "due" | "f const prepared = await locked(state, async () => { warnIfDisabled(state, "run"); await ensureLoaded(state, { skipRecompute: true }); + // Normalize job tick state (clears stale runningAtMs markers) before + // checking if already running, so a stale marker from a crashed Phase-1 + // persist does not block manual triggers for up to STUCK_RUN_MS (#17554). + recomputeNextRunsForMaintenance(state); const job = findJobOrThrow(state, id); if (typeof job.state.runningAtMs === "number") { return { ok: true, ran: false, reason: "already-running" as const };