import { describe, expect, it } from "vitest"; import type { ToolLoopDetectionConfig } from "../config/types.tools.js"; import type { SessionState } from "../logging/diagnostic-session-state.js"; import { CRITICAL_THRESHOLD, GLOBAL_CIRCUIT_BREAKER_THRESHOLD, TOOL_CALL_HISTORY_SIZE, WARNING_THRESHOLD, detectToolCallLoop, getToolCallStats, hashToolCall, recordToolCall, recordToolCallOutcome, } from "./tool-loop-detection.js"; function createState(): SessionState { return { lastActivity: Date.now(), state: "processing", queueDepth: 0, }; } const enabledLoopDetectionConfig: ToolLoopDetectionConfig = { enabled: true }; const shortHistoryLoopConfig: ToolLoopDetectionConfig = { enabled: true, historySize: 4, }; function recordSuccessfulCall( state: SessionState, toolName: string, params: unknown, result: unknown, index: number, ): void { const toolCallId = `${toolName}-${index}`; recordToolCall(state, toolName, params, toolCallId); recordToolCallOutcome(state, { toolName, toolParams: params, toolCallId, result, }); } function recordRepeatedSuccessfulCalls(params: { state: SessionState; toolName: string; toolParams: unknown; result: unknown; count: number; startIndex?: number; }) { const startIndex = params.startIndex ?? 0; for (let i = 0; i < params.count; i += 1) { recordSuccessfulCall( params.state, params.toolName, params.toolParams, params.result, startIndex + i, ); } } function createNoProgressPollFixture(sessionId: string) { return { params: { action: "poll", sessionId }, result: { content: [{ type: "text", text: "(no new output)\n\nProcess still running." }], details: { status: "running", aggregated: "steady" }, }, }; } function createReadNoProgressFixture() { return { toolName: "read", params: { path: "/same.txt" }, result: { content: [{ type: "text", text: "same output" }], details: { ok: true }, }, } as const; } function createPingPongFixture() { return { state: createState(), readParams: { path: "/a.txt" }, listParams: { dir: "/workspace" }, }; } function detectLoopAfterRepeatedCalls(params: { toolName: string; toolParams: unknown; result: unknown; count: number; config?: ToolLoopDetectionConfig; }) { const state = createState(); recordRepeatedSuccessfulCalls({ state, toolName: params.toolName, toolParams: params.toolParams, result: params.result, count: params.count, }); return detectToolCallLoop( state, params.toolName, params.toolParams, params.config ?? enabledLoopDetectionConfig, ); } function recordSuccessfulPingPongCalls(params: { state: SessionState; readParams: { path: string }; listParams: { dir: string }; count: number; textAtIndex: (toolName: "read" | "list", index: number) => string; }) { for (let i = 0; i < params.count; i += 1) { if (i % 2 === 0) { recordSuccessfulCall( params.state, "read", params.readParams, { content: [{ type: "text", text: params.textAtIndex("read", i) }], details: { ok: true } }, i, ); } else { recordSuccessfulCall( params.state, "list", params.listParams, { content: [{ type: "text", text: params.textAtIndex("list", i) }], details: { ok: true } }, i, ); } } } function expectPingPongLoop( loopResult: ReturnType, expected: { level: "warning" | "critical"; count: number; expectCriticalText?: boolean }, ) { expect(loopResult.stuck).toBe(true); if (!loopResult.stuck) { return; } expect(loopResult.level).toBe(expected.level); expect(loopResult.detector).toBe("ping_pong"); expect(loopResult.count).toBe(expected.count); if (expected.expectCriticalText) { expect(loopResult.message).toContain("CRITICAL"); } } describe("tool-loop-detection", () => { describe("hashToolCall", () => { it("creates consistent hash for same tool and params", () => { const hash1 = hashToolCall("read", { path: "/file.txt" }); const hash2 = hashToolCall("read", { path: "/file.txt" }); expect(hash1).toBe(hash2); }); it("creates different hashes for different params", () => { const hash1 = hashToolCall("read", { path: "/file1.txt" }); const hash2 = hashToolCall("read", { path: "/file2.txt" }); expect(hash1).not.toBe(hash2); }); it("creates different hashes for different tools", () => { const hash1 = hashToolCall("read", { path: "/file.txt" }); const hash2 = hashToolCall("write", { path: "/file.txt" }); expect(hash1).not.toBe(hash2); }); it("handles non-object params", () => { expect(() => hashToolCall("tool", "string-param")).not.toThrow(); expect(() => hashToolCall("tool", 123)).not.toThrow(); expect(() => hashToolCall("tool", null)).not.toThrow(); }); it("produces deterministic hashes regardless of key order", () => { const hash1 = hashToolCall("tool", { a: 1, b: 2 }); const hash2 = hashToolCall("tool", { b: 2, a: 1 }); expect(hash1).toBe(hash2); }); it("keeps hashes fixed-size even for large params", () => { const payload = { data: "x".repeat(20_000) }; const hash = hashToolCall("read", payload); expect(hash.startsWith("read:")).toBe(true); expect(hash.length).toBe("read:".length + 64); }); }); describe("recordToolCall", () => { it("adds tool call to empty history", () => { const state = createState(); recordToolCall(state, "read", { path: "/file.txt" }, "call-1"); expect(state.toolCallHistory).toHaveLength(1); expect(state.toolCallHistory?.[0]?.toolName).toBe("read"); expect(state.toolCallHistory?.[0]?.toolCallId).toBe("call-1"); }); it("maintains sliding window of last N calls", () => { const state = createState(); for (let i = 0; i < TOOL_CALL_HISTORY_SIZE + 10; i += 1) { recordToolCall(state, "tool", { iteration: i }, `call-${i}`); } expect(state.toolCallHistory).toHaveLength(TOOL_CALL_HISTORY_SIZE); const oldestCall = state.toolCallHistory?.[0]; expect(oldestCall?.argsHash).toBe(hashToolCall("tool", { iteration: 10 })); }); it("records timestamp for each call", () => { const state = createState(); const before = Date.now(); recordToolCall(state, "tool", { arg: 1 }, "call-ts"); const after = Date.now(); const timestamp = state.toolCallHistory?.[0]?.timestamp ?? 0; expect(timestamp).toBeGreaterThanOrEqual(before); expect(timestamp).toBeLessThanOrEqual(after); }); it("respects configured historySize", () => { const state = createState(); for (let i = 0; i < 10; i += 1) { recordToolCall(state, "tool", { iteration: i }, `call-${i}`, shortHistoryLoopConfig); } expect(state.toolCallHistory).toHaveLength(4); expect(state.toolCallHistory?.[0]?.argsHash).toBe(hashToolCall("tool", { iteration: 6 })); }); }); describe("detectToolCallLoop", () => { it("is disabled by default", () => { const state = createState(); for (let i = 0; i < 20; i += 1) { recordToolCall(state, "read", { path: "/same.txt" }, `default-${i}`); } const loopResult = detectToolCallLoop(state, "read", { path: "/same.txt" }); expect(loopResult.stuck).toBe(false); }); it("does not flag unique tool calls", () => { const state = createState(); for (let i = 0; i < 15; i += 1) { recordToolCall(state, "read", { path: `/file${i}.txt` }, `call-${i}`); } const result = detectToolCallLoop( state, "read", { path: "/new-file.txt" }, enabledLoopDetectionConfig, ); expect(result.stuck).toBe(false); }); it("warns on generic repeated tool+args calls", () => { const state = createState(); for (let i = 0; i < WARNING_THRESHOLD; i += 1) { recordToolCall(state, "read", { path: "/same.txt" }, `warn-${i}`); } const result = detectToolCallLoop( state, "read", { path: "/same.txt" }, enabledLoopDetectionConfig, ); expect(result.stuck).toBe(true); if (result.stuck) { expect(result.level).toBe("warning"); expect(result.detector).toBe("generic_repeat"); expect(result.count).toBe(WARNING_THRESHOLD); expect(result.message).toContain("WARNING"); expect(result.message).toContain(`${WARNING_THRESHOLD} times`); } }); it("keeps generic loops warn-only below global breaker threshold", () => { const fixture = createReadNoProgressFixture(); const loopResult = detectLoopAfterRepeatedCalls({ toolName: fixture.toolName, toolParams: fixture.params, result: fixture.result, count: CRITICAL_THRESHOLD, }); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); } }); it("applies custom thresholds when detection is enabled", () => { const state = createState(); const { params, result } = createNoProgressPollFixture("sess-custom"); const config: ToolLoopDetectionConfig = { enabled: true, warningThreshold: 2, criticalThreshold: 4, detectors: { genericRepeat: false, knownPollNoProgress: true, pingPong: false, }, }; recordRepeatedSuccessfulCalls({ state, toolName: "process", toolParams: params, result, count: 2, }); const warningResult = detectToolCallLoop(state, "process", params, config); expect(warningResult.stuck).toBe(true); if (warningResult.stuck) { expect(warningResult.level).toBe("warning"); } recordRepeatedSuccessfulCalls({ state, toolName: "process", toolParams: params, result, count: 2, startIndex: 2, }); const criticalResult = detectToolCallLoop(state, "process", params, config); expect(criticalResult.stuck).toBe(true); if (criticalResult.stuck) { expect(criticalResult.level).toBe("critical"); expect(criticalResult.detector).toBe("known_poll_no_progress"); } }); it("can disable specific detectors", () => { const state = createState(); const { params, result } = createNoProgressPollFixture("sess-no-detectors"); const config: ToolLoopDetectionConfig = { enabled: true, detectors: { genericRepeat: false, knownPollNoProgress: false, pingPong: false, }, }; recordRepeatedSuccessfulCalls({ state, toolName: "process", toolParams: params, result, count: CRITICAL_THRESHOLD, }); const loopResult = detectToolCallLoop(state, "process", params, config); expect(loopResult.stuck).toBe(false); }); it("warns for known polling no-progress loops", () => { const { params, result } = createNoProgressPollFixture("sess-1"); const loopResult = detectLoopAfterRepeatedCalls({ toolName: "process", toolParams: params, result, count: WARNING_THRESHOLD, }); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("warning"); expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("no progress"); } }); it("blocks known polling no-progress loops at critical threshold", () => { const { params, result } = createNoProgressPollFixture("sess-1"); const loopResult = detectLoopAfterRepeatedCalls({ toolName: "process", toolParams: params, result, count: CRITICAL_THRESHOLD, }); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); expect(loopResult.detector).toBe("known_poll_no_progress"); expect(loopResult.message).toContain("CRITICAL"); } }); it("does not block known polling when output progresses", () => { const state = createState(); const params = { action: "poll", sessionId: "sess-1" }; for (let i = 0; i < CRITICAL_THRESHOLD + 5; i += 1) { const result = { content: [{ type: "text", text: `line ${i}` }], details: { status: "running", aggregated: `line ${i}` }, }; recordSuccessfulCall(state, "process", params, result, i); } const loopResult = detectToolCallLoop(state, "process", params, enabledLoopDetectionConfig); expect(loopResult.stuck).toBe(false); }); it("blocks any tool with global no-progress breaker at 30", () => { const fixture = createReadNoProgressFixture(); const loopResult = detectLoopAfterRepeatedCalls({ toolName: fixture.toolName, toolParams: fixture.params, result: fixture.result, count: GLOBAL_CIRCUIT_BREAKER_THRESHOLD, }); expect(loopResult.stuck).toBe(true); if (loopResult.stuck) { expect(loopResult.level).toBe("critical"); expect(loopResult.detector).toBe("global_circuit_breaker"); expect(loopResult.message).toContain("global circuit breaker"); } }); it("warns on ping-pong alternating patterns", () => { const state = createState(); const readParams = { path: "/a.txt" }; const listParams = { dir: "/workspace" }; for (let i = 0; i < WARNING_THRESHOLD - 1; i += 1) { if (i % 2 === 0) { recordToolCall(state, "read", readParams, `read-${i}`); } else { recordToolCall(state, "list", listParams, `list-${i}`); } } const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig); expectPingPongLoop(loopResult, { level: "warning", count: WARNING_THRESHOLD }); if (loopResult.stuck) { expect(loopResult.message).toContain("ping-pong loop"); } }); it("blocks ping-pong alternating patterns at critical threshold", () => { const { state, readParams, listParams } = createPingPongFixture(); recordSuccessfulPingPongCalls({ state, readParams, listParams, count: CRITICAL_THRESHOLD - 1, textAtIndex: (toolName) => (toolName === "read" ? "read stable" : "list stable"), }); const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig); expectPingPongLoop(loopResult, { level: "critical", count: CRITICAL_THRESHOLD, expectCriticalText: true, }); if (loopResult.stuck) { expect(loopResult.message).toContain("ping-pong loop"); } }); it("does not block ping-pong at critical threshold when outcomes are progressing", () => { const { state, readParams, listParams } = createPingPongFixture(); recordSuccessfulPingPongCalls({ state, readParams, listParams, count: CRITICAL_THRESHOLD - 1, textAtIndex: (toolName, index) => `${toolName} ${index}`, }); const loopResult = detectToolCallLoop(state, "list", listParams, enabledLoopDetectionConfig); expectPingPongLoop(loopResult, { level: "warning", count: CRITICAL_THRESHOLD }); }); it("does not flag ping-pong when alternation is broken", () => { const state = createState(); recordToolCall(state, "read", { path: "/a.txt" }, "a1"); recordToolCall(state, "list", { dir: "/workspace" }, "b1"); recordToolCall(state, "read", { path: "/a.txt" }, "a2"); recordToolCall(state, "write", { path: "/tmp/out.txt" }, "c1"); // breaks alternation const loopResult = detectToolCallLoop( state, "list", { dir: "/workspace" }, enabledLoopDetectionConfig, ); expect(loopResult.stuck).toBe(false); }); it("records fixed-size result hashes for large tool outputs", () => { const state = createState(); const params = { action: "log", sessionId: "sess-big" }; const toolCallId = "log-big"; recordToolCall(state, "process", params, toolCallId); recordToolCallOutcome(state, { toolName: "process", toolParams: params, toolCallId, result: { content: [{ type: "text", text: "y".repeat(40_000) }], details: { status: "running", totalLines: 1, totalChars: 40_000 }, }, }); const entry = state.toolCallHistory?.find((call) => call.toolCallId === toolCallId); expect(typeof entry?.resultHash).toBe("string"); expect(entry?.resultHash?.length).toBe(64); }); it("handles empty history", () => { const state = createState(); const result = detectToolCallLoop(state, "tool", { arg: 1 }, enabledLoopDetectionConfig); expect(result.stuck).toBe(false); }); }); describe("getToolCallStats", () => { it("returns zero stats for empty history", () => { const state = createState(); const stats = getToolCallStats(state); expect(stats.totalCalls).toBe(0); expect(stats.uniquePatterns).toBe(0); expect(stats.mostFrequent).toBeNull(); }); it("counts total calls and unique patterns", () => { const state = createState(); for (let i = 0; i < 5; i += 1) { recordToolCall(state, "read", { path: "/file.txt" }, `same-${i}`); } recordToolCall(state, "write", { path: "/output.txt" }, "write-1"); recordToolCall(state, "list", { dir: "/home" }, "list-1"); recordToolCall(state, "read", { path: "/other.txt" }, "read-other"); const stats = getToolCallStats(state); expect(stats.totalCalls).toBe(8); expect(stats.uniquePatterns).toBe(4); }); it("identifies most frequent pattern", () => { const state = createState(); for (let i = 0; i < 3; i += 1) { recordToolCall(state, "read", { path: "/file1.txt" }, `p1-${i}`); } for (let i = 0; i < 7; i += 1) { recordToolCall(state, "read", { path: "/file2.txt" }, `p2-${i}`); } for (let i = 0; i < 2; i += 1) { recordToolCall(state, "write", { path: "/output.txt" }, `p3-${i}`); } const stats = getToolCallStats(state); expect(stats.mostFrequent?.toolName).toBe("read"); expect(stats.mostFrequent?.count).toBe(7); }); }); });