* fix: detect PID recycling in session write lock staleness check The session lock uses isPidAlive() to determine if a lock holder is still running. In containers, PID recycling can cause a different process to inherit the same PID, making the lock appear valid when the original holder is dead. Record the process start time (field 22 of /proc/pid/stat) in the lock file and compare it during staleness checks. If the PID is alive but its start time differs from the recorded value, the lock is treated as stale and reclaimed immediately. Backward compatible: lock files without starttime are handled with the existing PID-alive + age-based logic. Non-Linux platforms skip the starttime check entirely (getProcessStartTime returns null). * shared: harden pid starttime parsing * sessions: validate lock pid/starttime payloads * changelog: note recycled PID lock recovery fix * changelog: credit hiroki and vincent on lock recovery fix --------- Co-authored-by: HirokiKobayashi-R <hiroki@rhems-japan.co.jp>
71 lines
2.0 KiB
TypeScript
71 lines
2.0 KiB
TypeScript
import fsSync from "node:fs";
|
|
|
|
function isValidPid(pid: number): boolean {
|
|
return Number.isInteger(pid) && pid > 0;
|
|
}
|
|
|
|
/**
|
|
* Check if a process is a zombie on Linux by reading /proc/<pid>/status.
|
|
* Returns false on non-Linux platforms or if the proc file can't be read.
|
|
*/
|
|
function isZombieProcess(pid: number): boolean {
|
|
if (process.platform !== "linux") {
|
|
return false;
|
|
}
|
|
try {
|
|
const status = fsSync.readFileSync(`/proc/${pid}/status`, "utf8");
|
|
const stateMatch = status.match(/^State:\s+(\S)/m);
|
|
return stateMatch?.[1] === "Z";
|
|
} catch {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
export function isPidAlive(pid: number): boolean {
|
|
if (!isValidPid(pid)) {
|
|
return false;
|
|
}
|
|
try {
|
|
process.kill(pid, 0);
|
|
} catch {
|
|
return false;
|
|
}
|
|
if (isZombieProcess(pid)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Read the process start time (field 22 "starttime") from /proc/<pid>/stat.
|
|
* Returns the value in clock ticks since system boot, or null on non-Linux
|
|
* platforms or if the proc file can't be read.
|
|
*
|
|
* This is used to detect PID recycling: if two readings for the same PID
|
|
* return different starttimes, the PID has been reused by a different process.
|
|
*/
|
|
export function getProcessStartTime(pid: number): number | null {
|
|
if (process.platform !== "linux") {
|
|
return null;
|
|
}
|
|
if (!isValidPid(pid)) {
|
|
return null;
|
|
}
|
|
try {
|
|
const stat = fsSync.readFileSync(`/proc/${pid}/stat`, "utf8");
|
|
const commEndIndex = stat.lastIndexOf(")");
|
|
if (commEndIndex < 0) {
|
|
return null;
|
|
}
|
|
// The comm field (field 2) is wrapped in parens and can contain spaces,
|
|
// so split after the last ")" to get fields 3..N reliably.
|
|
const afterComm = stat.slice(commEndIndex + 1).trimStart();
|
|
const fields = afterComm.split(/\s+/);
|
|
// field 22 (starttime) = index 19 after the comm-split (field 3 is index 0).
|
|
const starttime = Number(fields[19]);
|
|
return Number.isInteger(starttime) && starttime >= 0 ? starttime : null;
|
|
} catch {
|
|
return null;
|
|
}
|
|
}
|