Files
Moltbot/src/agents/tool-loop-detection.ts
2026-02-16 23:27:35 +00:00

624 lines
18 KiB
TypeScript

import { createHash } from "node:crypto";
import type { ToolLoopDetectionConfig } from "../config/types.tools.js";
import type { SessionState } from "../logging/diagnostic-session-state.js";
import { createSubsystemLogger } from "../logging/subsystem.js";
import { isPlainObject } from "../utils.js";
const log = createSubsystemLogger("agents/loop-detection");
export type LoopDetectorKind =
| "generic_repeat"
| "known_poll_no_progress"
| "global_circuit_breaker"
| "ping_pong";
export type LoopDetectionResult =
| { stuck: false }
| {
stuck: true;
level: "warning" | "critical";
detector: LoopDetectorKind;
count: number;
message: string;
pairedToolName?: string;
warningKey?: string;
};
export const TOOL_CALL_HISTORY_SIZE = 30;
export const WARNING_THRESHOLD = 10;
export const CRITICAL_THRESHOLD = 20;
export const GLOBAL_CIRCUIT_BREAKER_THRESHOLD = 30;
const DEFAULT_LOOP_DETECTION_CONFIG = {
enabled: false,
historySize: TOOL_CALL_HISTORY_SIZE,
warningThreshold: WARNING_THRESHOLD,
criticalThreshold: CRITICAL_THRESHOLD,
globalCircuitBreakerThreshold: GLOBAL_CIRCUIT_BREAKER_THRESHOLD,
detectors: {
genericRepeat: true,
knownPollNoProgress: true,
pingPong: true,
},
};
type ResolvedLoopDetectionConfig = {
enabled: boolean;
historySize: number;
warningThreshold: number;
criticalThreshold: number;
globalCircuitBreakerThreshold: number;
detectors: {
genericRepeat: boolean;
knownPollNoProgress: boolean;
pingPong: boolean;
};
};
function asPositiveInt(value: number | undefined, fallback: number): number {
if (typeof value !== "number" || !Number.isInteger(value) || value <= 0) {
return fallback;
}
return value;
}
function resolveLoopDetectionConfig(config?: ToolLoopDetectionConfig): ResolvedLoopDetectionConfig {
let warningThreshold = asPositiveInt(
config?.warningThreshold,
DEFAULT_LOOP_DETECTION_CONFIG.warningThreshold,
);
let criticalThreshold = asPositiveInt(
config?.criticalThreshold,
DEFAULT_LOOP_DETECTION_CONFIG.criticalThreshold,
);
let globalCircuitBreakerThreshold = asPositiveInt(
config?.globalCircuitBreakerThreshold,
DEFAULT_LOOP_DETECTION_CONFIG.globalCircuitBreakerThreshold,
);
if (criticalThreshold <= warningThreshold) {
criticalThreshold = warningThreshold + 1;
}
if (globalCircuitBreakerThreshold <= criticalThreshold) {
globalCircuitBreakerThreshold = criticalThreshold + 1;
}
return {
enabled: config?.enabled ?? DEFAULT_LOOP_DETECTION_CONFIG.enabled,
historySize: asPositiveInt(config?.historySize, DEFAULT_LOOP_DETECTION_CONFIG.historySize),
warningThreshold,
criticalThreshold,
globalCircuitBreakerThreshold,
detectors: {
genericRepeat:
config?.detectors?.genericRepeat ?? DEFAULT_LOOP_DETECTION_CONFIG.detectors.genericRepeat,
knownPollNoProgress:
config?.detectors?.knownPollNoProgress ??
DEFAULT_LOOP_DETECTION_CONFIG.detectors.knownPollNoProgress,
pingPong: config?.detectors?.pingPong ?? DEFAULT_LOOP_DETECTION_CONFIG.detectors.pingPong,
},
};
}
/**
* Hash a tool call for pattern matching.
* Uses tool name + deterministic JSON serialization digest of params.
*/
export function hashToolCall(toolName: string, params: unknown): string {
return `${toolName}:${digestStable(params)}`;
}
function stableStringify(value: unknown): string {
if (value === null || typeof value !== "object") {
return JSON.stringify(value);
}
if (Array.isArray(value)) {
return `[${value.map(stableStringify).join(",")}]`;
}
const obj = value as Record<string, unknown>;
const keys = Object.keys(obj).toSorted();
return `{${keys.map((k) => `${JSON.stringify(k)}:${stableStringify(obj[k])}`).join(",")}}`;
}
function digestStable(value: unknown): string {
const serialized = stableStringifyFallback(value);
return createHash("sha256").update(serialized).digest("hex");
}
function stableStringifyFallback(value: unknown): string {
try {
return stableStringify(value);
} catch {
if (value === null || value === undefined) {
return `${value}`;
}
if (typeof value === "string") {
return value;
}
if (typeof value === "number" || typeof value === "boolean" || typeof value === "bigint") {
return `${value}`;
}
if (value instanceof Error) {
return `${value.name}:${value.message}`;
}
return Object.prototype.toString.call(value);
}
}
function isKnownPollToolCall(toolName: string, params: unknown): boolean {
if (toolName === "command_status") {
return true;
}
if (toolName !== "process" || !isPlainObject(params)) {
return false;
}
const action = params.action;
return action === "poll" || action === "log";
}
function extractTextContent(result: unknown): string {
if (!isPlainObject(result) || !Array.isArray(result.content)) {
return "";
}
return result.content
.filter(
(entry): entry is { type: string; text: string } =>
isPlainObject(entry) && typeof entry.type === "string" && typeof entry.text === "string",
)
.map((entry) => entry.text)
.join("\n")
.trim();
}
function formatErrorForHash(error: unknown): string {
if (error instanceof Error) {
return error.message || error.name;
}
if (typeof error === "string") {
return error;
}
if (typeof error === "number" || typeof error === "boolean" || typeof error === "bigint") {
return `${error}`;
}
return stableStringify(error);
}
function hashToolOutcome(
toolName: string,
params: unknown,
result: unknown,
error: unknown,
): string | undefined {
if (error !== undefined) {
return `error:${digestStable(formatErrorForHash(error))}`;
}
if (!isPlainObject(result)) {
return result === undefined ? undefined : digestStable(result);
}
const details = isPlainObject(result.details) ? result.details : {};
const text = extractTextContent(result);
if (isKnownPollToolCall(toolName, params) && toolName === "process" && isPlainObject(params)) {
const action = params.action;
if (action === "poll") {
return digestStable({
action,
status: details.status,
exitCode: details.exitCode ?? null,
exitSignal: details.exitSignal ?? null,
aggregated: details.aggregated ?? null,
text,
});
}
if (action === "log") {
return digestStable({
action,
status: details.status,
totalLines: details.totalLines ?? null,
totalChars: details.totalChars ?? null,
truncated: details.truncated ?? null,
exitCode: details.exitCode ?? null,
exitSignal: details.exitSignal ?? null,
text,
});
}
}
return digestStable({
details,
text,
});
}
function getNoProgressStreak(
history: Array<{ toolName: string; argsHash: string; resultHash?: string }>,
toolName: string,
argsHash: string,
): { count: number; latestResultHash?: string } {
let streak = 0;
let latestResultHash: string | undefined;
for (let i = history.length - 1; i >= 0; i -= 1) {
const record = history[i];
if (!record || record.toolName !== toolName || record.argsHash !== argsHash) {
continue;
}
if (typeof record.resultHash !== "string" || !record.resultHash) {
continue;
}
if (!latestResultHash) {
latestResultHash = record.resultHash;
streak = 1;
continue;
}
if (record.resultHash !== latestResultHash) {
break;
}
streak += 1;
}
return { count: streak, latestResultHash };
}
function getPingPongStreak(
history: Array<{ toolName: string; argsHash: string; resultHash?: string }>,
currentSignature: string,
): {
count: number;
pairedToolName?: string;
pairedSignature?: string;
noProgressEvidence: boolean;
} {
const last = history.at(-1);
if (!last) {
return { count: 0, noProgressEvidence: false };
}
let otherSignature: string | undefined;
let otherToolName: string | undefined;
for (let i = history.length - 2; i >= 0; i -= 1) {
const call = history[i];
if (!call) {
continue;
}
if (call.argsHash !== last.argsHash) {
otherSignature = call.argsHash;
otherToolName = call.toolName;
break;
}
}
if (!otherSignature || !otherToolName) {
return { count: 0, noProgressEvidence: false };
}
let alternatingTailCount = 0;
for (let i = history.length - 1; i >= 0; i -= 1) {
const call = history[i];
if (!call) {
continue;
}
const expected = alternatingTailCount % 2 === 0 ? last.argsHash : otherSignature;
if (call.argsHash !== expected) {
break;
}
alternatingTailCount += 1;
}
if (alternatingTailCount < 2) {
return { count: 0, noProgressEvidence: false };
}
const expectedCurrentSignature = otherSignature;
if (currentSignature !== expectedCurrentSignature) {
return { count: 0, noProgressEvidence: false };
}
const tailStart = Math.max(0, history.length - alternatingTailCount);
let firstHashA: string | undefined;
let firstHashB: string | undefined;
let noProgressEvidence = true;
for (let i = tailStart; i < history.length; i += 1) {
const call = history[i];
if (!call) {
continue;
}
if (!call.resultHash) {
noProgressEvidence = false;
break;
}
if (call.argsHash === last.argsHash) {
if (!firstHashA) {
firstHashA = call.resultHash;
} else if (firstHashA !== call.resultHash) {
noProgressEvidence = false;
break;
}
continue;
}
if (call.argsHash === otherSignature) {
if (!firstHashB) {
firstHashB = call.resultHash;
} else if (firstHashB !== call.resultHash) {
noProgressEvidence = false;
break;
}
continue;
}
noProgressEvidence = false;
break;
}
// Need repeated stable outcomes on both sides before treating ping-pong as no-progress.
if (!firstHashA || !firstHashB) {
noProgressEvidence = false;
}
return {
count: alternatingTailCount + 1,
pairedToolName: last.toolName,
pairedSignature: last.argsHash,
noProgressEvidence,
};
}
function canonicalPairKey(signatureA: string, signatureB: string): string {
return [signatureA, signatureB].toSorted().join("|");
}
/**
* Detect if an agent is stuck in a repetitive tool call loop.
* Checks if the same tool+params combination has been called excessively.
*/
export function detectToolCallLoop(
state: SessionState,
toolName: string,
params: unknown,
config?: ToolLoopDetectionConfig,
): LoopDetectionResult {
const resolvedConfig = resolveLoopDetectionConfig(config);
if (!resolvedConfig.enabled) {
return { stuck: false };
}
const history = state.toolCallHistory ?? [];
const currentHash = hashToolCall(toolName, params);
const noProgress = getNoProgressStreak(history, toolName, currentHash);
const noProgressStreak = noProgress.count;
const knownPollTool = isKnownPollToolCall(toolName, params);
const pingPong = getPingPongStreak(history, currentHash);
if (noProgressStreak >= resolvedConfig.globalCircuitBreakerThreshold) {
log.error(
`Global circuit breaker triggered: ${toolName} repeated ${noProgressStreak} times with no progress`,
);
return {
stuck: true,
level: "critical",
detector: "global_circuit_breaker",
count: noProgressStreak,
message: `CRITICAL: ${toolName} has repeated identical no-progress outcomes ${noProgressStreak} times. Session execution blocked by global circuit breaker to prevent runaway loops.`,
warningKey: `global:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
};
}
if (
knownPollTool &&
resolvedConfig.detectors.knownPollNoProgress &&
noProgressStreak >= resolvedConfig.criticalThreshold
) {
log.error(`Critical polling loop detected: ${toolName} repeated ${noProgressStreak} times`);
return {
stuck: true,
level: "critical",
detector: "known_poll_no_progress",
count: noProgressStreak,
message: `CRITICAL: Called ${toolName} with identical arguments and no progress ${noProgressStreak} times. This appears to be a stuck polling loop. Session execution blocked to prevent resource waste.`,
warningKey: `poll:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
};
}
if (
knownPollTool &&
resolvedConfig.detectors.knownPollNoProgress &&
noProgressStreak >= resolvedConfig.warningThreshold
) {
log.warn(`Polling loop warning: ${toolName} repeated ${noProgressStreak} times`);
return {
stuck: true,
level: "warning",
detector: "known_poll_no_progress",
count: noProgressStreak,
message: `WARNING: You have called ${toolName} ${noProgressStreak} times with identical arguments and no progress. Stop polling and either (1) increase wait time between checks, or (2) report the task as failed if the process is stuck.`,
warningKey: `poll:${toolName}:${currentHash}:${noProgress.latestResultHash ?? "none"}`,
};
}
const pingPongWarningKey = pingPong.pairedSignature
? `pingpong:${canonicalPairKey(currentHash, pingPong.pairedSignature)}`
: `pingpong:${toolName}:${currentHash}`;
if (
resolvedConfig.detectors.pingPong &&
pingPong.count >= resolvedConfig.criticalThreshold &&
pingPong.noProgressEvidence
) {
log.error(
`Critical ping-pong loop detected: alternating calls count=${pingPong.count} currentTool=${toolName}`,
);
return {
stuck: true,
level: "critical",
detector: "ping_pong",
count: pingPong.count,
message: `CRITICAL: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls) with no progress. This appears to be a stuck ping-pong loop. Session execution blocked to prevent resource waste.`,
pairedToolName: pingPong.pairedToolName,
warningKey: pingPongWarningKey,
};
}
if (resolvedConfig.detectors.pingPong && pingPong.count >= resolvedConfig.warningThreshold) {
log.warn(
`Ping-pong loop warning: alternating calls count=${pingPong.count} currentTool=${toolName}`,
);
return {
stuck: true,
level: "warning",
detector: "ping_pong",
count: pingPong.count,
message: `WARNING: You are alternating between repeated tool-call patterns (${pingPong.count} consecutive calls). This looks like a ping-pong loop; stop retrying and report the task as failed.`,
pairedToolName: pingPong.pairedToolName,
warningKey: pingPongWarningKey,
};
}
// Generic detector: warn-only for repeated identical calls.
const recentCount = history.filter(
(h) => h.toolName === toolName && h.argsHash === currentHash,
).length;
if (
!knownPollTool &&
resolvedConfig.detectors.genericRepeat &&
recentCount >= resolvedConfig.warningThreshold
) {
log.warn(`Loop warning: ${toolName} called ${recentCount} times with identical arguments`);
return {
stuck: true,
level: "warning",
detector: "generic_repeat",
count: recentCount,
message: `WARNING: You have called ${toolName} ${recentCount} times with identical arguments. If this is not making progress, stop retrying and report the task as failed.`,
warningKey: `generic:${toolName}:${currentHash}`,
};
}
return { stuck: false };
}
/**
* Record a tool call in the session's history for loop detection.
* Maintains sliding window of last N calls.
*/
export function recordToolCall(
state: SessionState,
toolName: string,
params: unknown,
toolCallId?: string,
config?: ToolLoopDetectionConfig,
): void {
const resolvedConfig = resolveLoopDetectionConfig(config);
if (!state.toolCallHistory) {
state.toolCallHistory = [];
}
state.toolCallHistory.push({
toolName,
argsHash: hashToolCall(toolName, params),
toolCallId,
timestamp: Date.now(),
});
if (state.toolCallHistory.length > resolvedConfig.historySize) {
state.toolCallHistory.shift();
}
}
/**
* Record a completed tool call outcome so loop detection can identify no-progress repeats.
*/
export function recordToolCallOutcome(
state: SessionState,
params: {
toolName: string;
toolParams: unknown;
toolCallId?: string;
result?: unknown;
error?: unknown;
config?: ToolLoopDetectionConfig;
},
): void {
const resolvedConfig = resolveLoopDetectionConfig(params.config);
const resultHash = hashToolOutcome(
params.toolName,
params.toolParams,
params.result,
params.error,
);
if (!resultHash) {
return;
}
if (!state.toolCallHistory) {
state.toolCallHistory = [];
}
const argsHash = hashToolCall(params.toolName, params.toolParams);
let matched = false;
for (let i = state.toolCallHistory.length - 1; i >= 0; i -= 1) {
const call = state.toolCallHistory[i];
if (!call) {
continue;
}
if (params.toolCallId && call.toolCallId !== params.toolCallId) {
continue;
}
if (call.toolName !== params.toolName || call.argsHash !== argsHash) {
continue;
}
if (call.resultHash !== undefined) {
continue;
}
call.resultHash = resultHash;
matched = true;
break;
}
if (!matched) {
state.toolCallHistory.push({
toolName: params.toolName,
argsHash,
toolCallId: params.toolCallId,
resultHash,
timestamp: Date.now(),
});
}
if (state.toolCallHistory.length > resolvedConfig.historySize) {
state.toolCallHistory.splice(0, state.toolCallHistory.length - resolvedConfig.historySize);
}
}
/**
* Get current tool call statistics for a session (for debugging/monitoring).
*/
export function getToolCallStats(state: SessionState): {
totalCalls: number;
uniquePatterns: number;
mostFrequent: { toolName: string; count: number } | null;
} {
const history = state.toolCallHistory ?? [];
const patterns = new Map<string, { toolName: string; count: number }>();
for (const call of history) {
const key = call.argsHash;
const existing = patterns.get(key);
if (existing) {
existing.count += 1;
} else {
patterns.set(key, { toolName: call.toolName, count: 1 });
}
}
let mostFrequent: { toolName: string; count: number } | null = null;
for (const pattern of patterns.values()) {
if (!mostFrequent || pattern.count > mostFrequent.count) {
mostFrequent = pattern;
}
}
return {
totalCalls: history.length,
uniquePatterns: patterns.size,
mostFrequent,
};
}