fix: clear stale runningAtMs in cron.run() before already-running check (#17949)
Add recomputeNextRunsForMaintenance() call in run() so that stale runningAtMs markers (from a crashed Phase-1 persist) are cleared by the existing normalizeJobTickState logic before the already-running guard. Without this, a manual cron.run() could be blocked for up to STUCK_RUN_MS (2 hours) even though no job was actually running. Fixes #17554 Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
committed by
GitHub
parent
60b8d645de
commit
be8930d6f9
@@ -8,6 +8,7 @@ import * as schedule from "./schedule.js";
|
||||
import { CronService } from "./service.js";
|
||||
import { createDeferred, createRunningCronServiceState } from "./service.test-harness.js";
|
||||
import { computeJobNextRunAtMs } from "./service/jobs.js";
|
||||
import { run } from "./service/ops.js";
|
||||
import { createCronServiceState, type CronEvent } from "./service/state.js";
|
||||
import {
|
||||
DEFAULT_JOB_TIMEOUT_MS,
|
||||
@@ -1450,6 +1451,61 @@ describe("Cron issue regressions", () => {
|
||||
expect(startedAtEvents).toEqual([dueAt, dueAt + 50]);
|
||||
});
|
||||
|
||||
it("#17554: run() clears stale runningAtMs and executes the job", async () => {
|
||||
const store = await makeStorePath();
|
||||
const now = Date.parse("2026-02-06T10:05:00.000Z");
|
||||
const staleRunningAtMs = now - 2 * 60 * 60 * 1000 - 1;
|
||||
|
||||
await fs.writeFile(
|
||||
store.storePath,
|
||||
JSON.stringify(
|
||||
{
|
||||
version: 1,
|
||||
jobs: [
|
||||
{
|
||||
id: "stale-running",
|
||||
name: "stale-running",
|
||||
enabled: true,
|
||||
createdAtMs: now - 3_600_000,
|
||||
updatedAtMs: now - 3_600_000,
|
||||
schedule: { kind: "at", at: new Date(now - 60_000).toISOString() },
|
||||
sessionTarget: "main",
|
||||
wakeMode: "now",
|
||||
payload: { kind: "systemEvent", text: "stale-running" },
|
||||
state: {
|
||||
runningAtMs: staleRunningAtMs,
|
||||
lastRunAtMs: now - 3_600_000,
|
||||
lastStatus: "ok",
|
||||
nextRunAtMs: now - 60_000,
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
null,
|
||||
2,
|
||||
),
|
||||
"utf-8",
|
||||
);
|
||||
|
||||
const enqueueSystemEvent = vi.fn();
|
||||
const state = createCronServiceState({
|
||||
cronEnabled: true,
|
||||
storePath: store.storePath,
|
||||
log: noopLogger,
|
||||
nowMs: () => now,
|
||||
enqueueSystemEvent,
|
||||
requestHeartbeatNow: vi.fn(),
|
||||
runIsolatedAgentJob: vi.fn().mockResolvedValue({ status: "ok", summary: "ok" }),
|
||||
});
|
||||
|
||||
const result = await run(state, "stale-running", "force");
|
||||
expect(result).toEqual({ ok: true, ran: true });
|
||||
expect(enqueueSystemEvent).toHaveBeenCalledWith(
|
||||
"stale-running",
|
||||
expect.objectContaining({ agentId: undefined }),
|
||||
);
|
||||
});
|
||||
|
||||
it("honors cron maxConcurrentRuns for due jobs", async () => {
|
||||
vi.useRealTimers();
|
||||
const store = await makeStorePath();
|
||||
|
||||
@@ -341,6 +341,10 @@ export async function run(state: CronServiceState, id: string, mode?: "due" | "f
|
||||
const prepared = await locked(state, async () => {
|
||||
warnIfDisabled(state, "run");
|
||||
await ensureLoaded(state, { skipRecompute: true });
|
||||
// Normalize job tick state (clears stale runningAtMs markers) before
|
||||
// checking if already running, so a stale marker from a crashed Phase-1
|
||||
// persist does not block manual triggers for up to STUCK_RUN_MS (#17554).
|
||||
recomputeNextRunsForMaintenance(state);
|
||||
const job = findJobOrThrow(state, id);
|
||||
if (typeof job.state.runningAtMs === "number") {
|
||||
return { ok: true, ran: false, reason: "already-running" as const };
|
||||
|
||||
Reference in New Issue
Block a user