From 4985c561dfc4be6362fa7a800f98da8ef3d9f0a4 Mon Sep 17 00:00:00 2001 From: bmendonca3 Date: Mon, 2 Mar 2026 12:20:03 -0700 Subject: [PATCH] sessions: reclaim orphan self-pid lock files --- src/agents/session-write-lock.test.ts | 52 +++++++++++++++++++++------ src/agents/session-write-lock.ts | 30 +++++++++++++++- 2 files changed, 71 insertions(+), 11 deletions(-) diff --git a/src/agents/session-write-lock.test.ts b/src/agents/session-write-lock.test.ts index 103d76293..665bdddc3 100644 --- a/src/agents/session-write-lock.test.ts +++ b/src/agents/session-write-lock.test.ts @@ -300,13 +300,13 @@ describe("acquireSessionWriteLock", () => { } }); - it("does not reclaim lock files without starttime (backward compat)", async () => { + it("reclaims orphan lock files without starttime when PID matches current process", async () => { const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-")); try { const sessionFile = path.join(root, "sessions.json"); const lockPath = `${sessionFile}.lock`; - // Old-format lock without starttime — should NOT be reclaimed just because - // starttime is missing. The PID is alive, so the lock is valid. + // Simulate an old-format lock file left behind by a previous process + // instance that reused the same PID (common in containers). await fs.writeFile( lockPath, JSON.stringify({ @@ -316,19 +316,46 @@ describe("acquireSessionWriteLock", () => { "utf8", ); - await expect(acquireSessionWriteLock({ sessionFile, timeoutMs: 50 })).rejects.toThrow( - /session file locked/, - ); + await expectCurrentPidOwnsLock({ sessionFile, timeoutMs: 500 }); } finally { await fs.rm(root, { recursive: true, force: true }); } }); - it("does not treat malformed starttime as recycled", async () => { + it("does not reclaim active in-process lock files without starttime", async () => { const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-")); try { const sessionFile = path.join(root, "sessions.json"); const lockPath = `${sessionFile}.lock`; + const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); + await fs.writeFile( + lockPath, + JSON.stringify({ + pid: process.pid, + createdAt: new Date().toISOString(), + }), + "utf8", + ); + + await expect( + acquireSessionWriteLock({ + sessionFile, + timeoutMs: 50, + allowReentrant: false, + }), + ).rejects.toThrow(/session file locked/); + await lock.release(); + } finally { + await fs.rm(root, { recursive: true, force: true }); + } + }); + + it("does not reclaim active in-process lock files with malformed starttime", async () => { + const root = await fs.mkdtemp(path.join(os.tmpdir(), "openclaw-lock-")); + try { + const sessionFile = path.join(root, "sessions.json"); + const lockPath = `${sessionFile}.lock`; + const lock = await acquireSessionWriteLock({ sessionFile, timeoutMs: 500 }); await fs.writeFile( lockPath, JSON.stringify({ @@ -339,9 +366,14 @@ describe("acquireSessionWriteLock", () => { "utf8", ); - await expect(acquireSessionWriteLock({ sessionFile, timeoutMs: 50 })).rejects.toThrow( - /session file locked/, - ); + await expect( + acquireSessionWriteLock({ + sessionFile, + timeoutMs: 50, + allowReentrant: false, + }), + ).rejects.toThrow(/session file locked/); + await lock.release(); } finally { await fs.rm(root, { recursive: true, force: true }); } diff --git a/src/agents/session-write-lock.ts b/src/agents/session-write-lock.ts index 837a7ada3..5f2cfb6fc 100644 --- a/src/agents/session-write-lock.ts +++ b/src/agents/session-write-lock.ts @@ -369,6 +369,21 @@ async function shouldReclaimContendedLockFile( } } +function shouldTreatAsOrphanSelfLock(params: { + payload: LockFilePayload | null; + normalizedSessionFile: string; +}): boolean { + const pid = isValidLockNumber(params.payload?.pid) ? params.payload.pid : null; + if (pid !== process.pid) { + return false; + } + const hasValidStarttime = isValidLockNumber(params.payload?.starttime); + if (hasValidStarttime) { + return false; + } + return !HELD_LOCKS.has(params.normalizedSessionFile); +} + export async function cleanStaleLockFiles(params: { sessionsDir: string; staleMs?: number; @@ -509,7 +524,20 @@ export async function acquireSessionWriteLock(params: { const payload = await readLockPayload(lockPath); const nowMs = Date.now(); const inspected = inspectLockPayload(payload, staleMs, nowMs); - if (await shouldReclaimContendedLockFile(lockPath, inspected, staleMs, nowMs)) { + const orphanSelfLock = shouldTreatAsOrphanSelfLock({ + payload, + normalizedSessionFile, + }); + const reclaimDetails = orphanSelfLock + ? { + ...inspected, + stale: true, + staleReasons: inspected.staleReasons.includes("orphan-self-pid") + ? inspected.staleReasons + : [...inspected.staleReasons, "orphan-self-pid"], + } + : inspected; + if (await shouldReclaimContendedLockFile(lockPath, reclaimDetails, staleMs, nowMs)) { await fs.rm(lockPath, { force: true }); continue; }