fix: report subagent timeout as 'timed out' instead of 'completed successfully' (#13996)

* fix: report subagent timeout as 'timed out' instead of 'completed successfully'

* fix: propagate subagent timeout status across agent.wait (#13996) (thanks @dario-github)

---------

Co-authored-by: Sebastian <19554889+sebslight@users.noreply.github.com>
This commit is contained in:
Dario Zhang
2026-02-12 01:55:30 +08:00
committed by GitHub
parent 2c6569a488
commit e85bbe01f2
5 changed files with 122 additions and 5 deletions

View File

@@ -69,6 +69,7 @@ Docs: https://docs.openclaw.ai
- Gateway: fix multi-agent sessions.usage discovery. (#11523) Thanks @Takhoffman.
- Agents: recover from context overflow caused by oversized tool results (pre-emptive capping + fallback truncation). (#11579) Thanks @tyler6204.
- Subagents/compaction: stabilize announce timing and preserve compaction metrics across retries. (#11664) Thanks @tyler6204.
- Subagents: report timeout-aborted runs as timed out instead of completed successfully in parent-session announcements. (#13996) Thanks @dario-github.
- Cron: share isolated announce flow and harden scheduling/delivery reliability. (#11641) Thanks @tyler6204.
- Cron tool: recover flat params when LLM omits the `job` wrapper for add requests. (#12124) Thanks @tyler6204.
- Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6.

View File

@@ -146,4 +146,77 @@ describe("openclaw-tools: subagents", () => {
// Session should be deleted
expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true);
});
it("sessions_spawn reports timed out when agent.wait returns timeout", async () => {
resetSubagentRegistryForTests();
callGatewayMock.mockReset();
const calls: Array<{ method?: string; params?: unknown }> = [];
let agentCallCount = 0;
callGatewayMock.mockImplementation(async (opts: unknown) => {
const request = opts as { method?: string; params?: unknown };
calls.push(request);
if (request.method === "agent") {
agentCallCount += 1;
return {
runId: `run-${agentCallCount}`,
status: "accepted",
acceptedAt: 5000 + agentCallCount,
};
}
if (request.method === "agent.wait") {
const params = request.params as { runId?: string } | undefined;
return {
runId: params?.runId ?? "run-1",
status: "timeout",
startedAt: 6000,
endedAt: 7000,
};
}
if (request.method === "chat.history") {
return {
messages: [
{
role: "assistant",
content: [{ type: "text", text: "still working" }],
},
],
};
}
return {};
});
const tool = createOpenClawTools({
agentSessionKey: "discord:group:req",
agentChannel: "discord",
}).find((candidate) => candidate.name === "sessions_spawn");
if (!tool) {
throw new Error("missing sessions_spawn tool");
}
const result = await tool.execute("call-timeout", {
task: "do thing",
runTimeoutSeconds: 1,
cleanup: "keep",
});
expect(result.details).toMatchObject({
status: "accepted",
runId: "run-1",
});
await sleep(0);
await sleep(0);
await sleep(0);
const mainAgentCall = calls
.filter((call) => call.method === "agent")
.find((call) => {
const params = call.params as { lane?: string } | undefined;
return params?.lane !== "subagent";
});
const mainMessage = (mainAgentCall?.params as { message?: string } | undefined)?.message ?? "";
expect(mainMessage).toContain("timed out");
expect(mainMessage).not.toContain("completed successfully");
});
});

View File

@@ -214,6 +214,8 @@ function ensureListener() {
if (phase === "error") {
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
entry.outcome = { status: "error", error };
} else if (evt.data?.aborted) {
entry.outcome = { status: "timeout" };
} else {
entry.outcome = { status: "ok" };
}
@@ -336,7 +338,7 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
},
timeoutMs: timeoutMs + 10_000,
});
if (wait?.status !== "ok" && wait?.status !== "error") {
if (wait?.status !== "ok" && wait?.status !== "error" && wait?.status !== "timeout") {
return;
}
const entry = subagentRuns.get(runId);
@@ -358,7 +360,11 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
}
const waitError = typeof wait.error === "string" ? wait.error : undefined;
entry.outcome =
wait.status === "error" ? { status: "error", error: waitError } : { status: "ok" };
wait.status === "error"
? { status: "error", error: waitError }
: wait.status === "timeout"
? { status: "timeout" }
: { status: "ok" };
mutated = true;
if (mutated) {
persistSubagentRuns();

View File

@@ -0,0 +1,37 @@
import { describe, expect, it } from "vitest";
import { emitAgentEvent } from "../../infra/agent-events.js";
import { waitForAgentJob } from "./agent-job.js";
describe("waitForAgentJob", () => {
it("maps lifecycle end events with aborted=true to timeout", async () => {
const runId = `run-timeout-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const waitPromise = waitForAgentJob({ runId, timeoutMs: 1_000 });
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "start", startedAt: 100 } });
emitAgentEvent({
runId,
stream: "lifecycle",
data: { phase: "end", endedAt: 200, aborted: true },
});
const snapshot = await waitPromise;
expect(snapshot).not.toBeNull();
expect(snapshot?.status).toBe("timeout");
expect(snapshot?.startedAt).toBe(100);
expect(snapshot?.endedAt).toBe(200);
});
it("keeps non-aborted lifecycle end events as ok", async () => {
const runId = `run-ok-${Date.now()}-${Math.random().toString(36).slice(2)}`;
const waitPromise = waitForAgentJob({ runId, timeoutMs: 1_000 });
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "start", startedAt: 300 } });
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "end", endedAt: 400 } });
const snapshot = await waitPromise;
expect(snapshot).not.toBeNull();
expect(snapshot?.status).toBe("ok");
expect(snapshot?.startedAt).toBe(300);
expect(snapshot?.endedAt).toBe(400);
});
});

View File

@@ -7,7 +7,7 @@ let agentRunListenerStarted = false;
type AgentRunSnapshot = {
runId: string;
status: "ok" | "error";
status: "ok" | "error" | "timeout";
startedAt?: number;
endedAt?: number;
error?: string;
@@ -55,7 +55,7 @@ function ensureAgentRunListener() {
agentRunStarts.delete(evt.runId);
recordAgentRunSnapshot({
runId: evt.runId,
status: phase === "error" ? "error" : "ok",
status: phase === "error" ? "error" : evt.data?.aborted ? "timeout" : "ok",
startedAt,
endedAt,
error,
@@ -118,7 +118,7 @@ export async function waitForAgentJob(params: {
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
const snapshot: AgentRunSnapshot = {
runId: evt.runId,
status: phase === "error" ? "error" : "ok",
status: phase === "error" ? "error" : evt.data?.aborted ? "timeout" : "ok",
startedAt,
endedAt,
error,