Files
Moltbot/src/shared/pid-alive.ts
Vincent Koc 5a2200b280 fix(sessions): harden recycled PID lock recovery follow-up (#31320)
* fix: detect PID recycling in session write lock staleness check

The session lock uses isPidAlive() to determine if a lock holder is
still running. In containers, PID recycling can cause a different
process to inherit the same PID, making the lock appear valid when
the original holder is dead.

Record the process start time (field 22 of /proc/pid/stat) in the
lock file and compare it during staleness checks. If the PID is alive
but its start time differs from the recorded value, the lock is
treated as stale and reclaimed immediately.

Backward compatible: lock files without starttime are handled with
the existing PID-alive + age-based logic. Non-Linux platforms skip
the starttime check entirely (getProcessStartTime returns null).

* shared: harden pid starttime parsing

* sessions: validate lock pid/starttime payloads

* changelog: note recycled PID lock recovery fix

* changelog: credit hiroki and vincent on lock recovery fix

---------

Co-authored-by: HirokiKobayashi-R <hiroki@rhems-japan.co.jp>
2026-03-01 21:42:22 -08:00

71 lines
2.0 KiB
TypeScript

import fsSync from "node:fs";
function isValidPid(pid: number): boolean {
return Number.isInteger(pid) && pid > 0;
}
/**
* Check if a process is a zombie on Linux by reading /proc/<pid>/status.
* Returns false on non-Linux platforms or if the proc file can't be read.
*/
function isZombieProcess(pid: number): boolean {
if (process.platform !== "linux") {
return false;
}
try {
const status = fsSync.readFileSync(`/proc/${pid}/status`, "utf8");
const stateMatch = status.match(/^State:\s+(\S)/m);
return stateMatch?.[1] === "Z";
} catch {
return false;
}
}
export function isPidAlive(pid: number): boolean {
if (!isValidPid(pid)) {
return false;
}
try {
process.kill(pid, 0);
} catch {
return false;
}
if (isZombieProcess(pid)) {
return false;
}
return true;
}
/**
* Read the process start time (field 22 "starttime") from /proc/<pid>/stat.
* Returns the value in clock ticks since system boot, or null on non-Linux
* platforms or if the proc file can't be read.
*
* This is used to detect PID recycling: if two readings for the same PID
* return different starttimes, the PID has been reused by a different process.
*/
export function getProcessStartTime(pid: number): number | null {
if (process.platform !== "linux") {
return null;
}
if (!isValidPid(pid)) {
return null;
}
try {
const stat = fsSync.readFileSync(`/proc/${pid}/stat`, "utf8");
const commEndIndex = stat.lastIndexOf(")");
if (commEndIndex < 0) {
return null;
}
// The comm field (field 2) is wrapped in parens and can contain spaces,
// so split after the last ")" to get fields 3..N reliably.
const afterComm = stat.slice(commEndIndex + 1).trimStart();
const fields = afterComm.split(/\s+/);
// field 22 (starttime) = index 19 after the comm-split (field 3 is index 0).
const starttime = Number(fields[19]);
return Number.isInteger(starttime) && starttime >= 0 ? starttime : null;
} catch {
return null;
}
}