fix: report subagent timeout as 'timed out' instead of 'completed successfully' (#13996)
* fix: report subagent timeout as 'timed out' instead of 'completed successfully' * fix: propagate subagent timeout status across agent.wait (#13996) (thanks @dario-github) --------- Co-authored-by: Sebastian <19554889+sebslight@users.noreply.github.com>
This commit is contained in:
@@ -69,6 +69,7 @@ Docs: https://docs.openclaw.ai
|
||||
- Gateway: fix multi-agent sessions.usage discovery. (#11523) Thanks @Takhoffman.
|
||||
- Agents: recover from context overflow caused by oversized tool results (pre-emptive capping + fallback truncation). (#11579) Thanks @tyler6204.
|
||||
- Subagents/compaction: stabilize announce timing and preserve compaction metrics across retries. (#11664) Thanks @tyler6204.
|
||||
- Subagents: report timeout-aborted runs as timed out instead of completed successfully in parent-session announcements. (#13996) Thanks @dario-github.
|
||||
- Cron: share isolated announce flow and harden scheduling/delivery reliability. (#11641) Thanks @tyler6204.
|
||||
- Cron tool: recover flat params when LLM omits the `job` wrapper for add requests. (#12124) Thanks @tyler6204.
|
||||
- Gateway/CLI: when `gateway.bind=lan`, use a LAN IP for probe URLs and Control UI links. (#11448) Thanks @AnonO6.
|
||||
|
||||
@@ -146,4 +146,77 @@ describe("openclaw-tools: subagents", () => {
|
||||
// Session should be deleted
|
||||
expect(deletedKey?.startsWith("agent:main:subagent:")).toBe(true);
|
||||
});
|
||||
|
||||
it("sessions_spawn reports timed out when agent.wait returns timeout", async () => {
|
||||
resetSubagentRegistryForTests();
|
||||
callGatewayMock.mockReset();
|
||||
const calls: Array<{ method?: string; params?: unknown }> = [];
|
||||
let agentCallCount = 0;
|
||||
|
||||
callGatewayMock.mockImplementation(async (opts: unknown) => {
|
||||
const request = opts as { method?: string; params?: unknown };
|
||||
calls.push(request);
|
||||
if (request.method === "agent") {
|
||||
agentCallCount += 1;
|
||||
return {
|
||||
runId: `run-${agentCallCount}`,
|
||||
status: "accepted",
|
||||
acceptedAt: 5000 + agentCallCount,
|
||||
};
|
||||
}
|
||||
if (request.method === "agent.wait") {
|
||||
const params = request.params as { runId?: string } | undefined;
|
||||
return {
|
||||
runId: params?.runId ?? "run-1",
|
||||
status: "timeout",
|
||||
startedAt: 6000,
|
||||
endedAt: 7000,
|
||||
};
|
||||
}
|
||||
if (request.method === "chat.history") {
|
||||
return {
|
||||
messages: [
|
||||
{
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "still working" }],
|
||||
},
|
||||
],
|
||||
};
|
||||
}
|
||||
return {};
|
||||
});
|
||||
|
||||
const tool = createOpenClawTools({
|
||||
agentSessionKey: "discord:group:req",
|
||||
agentChannel: "discord",
|
||||
}).find((candidate) => candidate.name === "sessions_spawn");
|
||||
if (!tool) {
|
||||
throw new Error("missing sessions_spawn tool");
|
||||
}
|
||||
|
||||
const result = await tool.execute("call-timeout", {
|
||||
task: "do thing",
|
||||
runTimeoutSeconds: 1,
|
||||
cleanup: "keep",
|
||||
});
|
||||
expect(result.details).toMatchObject({
|
||||
status: "accepted",
|
||||
runId: "run-1",
|
||||
});
|
||||
|
||||
await sleep(0);
|
||||
await sleep(0);
|
||||
await sleep(0);
|
||||
|
||||
const mainAgentCall = calls
|
||||
.filter((call) => call.method === "agent")
|
||||
.find((call) => {
|
||||
const params = call.params as { lane?: string } | undefined;
|
||||
return params?.lane !== "subagent";
|
||||
});
|
||||
const mainMessage = (mainAgentCall?.params as { message?: string } | undefined)?.message ?? "";
|
||||
|
||||
expect(mainMessage).toContain("timed out");
|
||||
expect(mainMessage).not.toContain("completed successfully");
|
||||
});
|
||||
});
|
||||
|
||||
@@ -214,6 +214,8 @@ function ensureListener() {
|
||||
if (phase === "error") {
|
||||
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
|
||||
entry.outcome = { status: "error", error };
|
||||
} else if (evt.data?.aborted) {
|
||||
entry.outcome = { status: "timeout" };
|
||||
} else {
|
||||
entry.outcome = { status: "ok" };
|
||||
}
|
||||
@@ -336,7 +338,7 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
|
||||
},
|
||||
timeoutMs: timeoutMs + 10_000,
|
||||
});
|
||||
if (wait?.status !== "ok" && wait?.status !== "error") {
|
||||
if (wait?.status !== "ok" && wait?.status !== "error" && wait?.status !== "timeout") {
|
||||
return;
|
||||
}
|
||||
const entry = subagentRuns.get(runId);
|
||||
@@ -358,7 +360,11 @@ async function waitForSubagentCompletion(runId: string, waitTimeoutMs: number) {
|
||||
}
|
||||
const waitError = typeof wait.error === "string" ? wait.error : undefined;
|
||||
entry.outcome =
|
||||
wait.status === "error" ? { status: "error", error: waitError } : { status: "ok" };
|
||||
wait.status === "error"
|
||||
? { status: "error", error: waitError }
|
||||
: wait.status === "timeout"
|
||||
? { status: "timeout" }
|
||||
: { status: "ok" };
|
||||
mutated = true;
|
||||
if (mutated) {
|
||||
persistSubagentRuns();
|
||||
|
||||
37
src/gateway/server-methods/agent-job.test.ts
Normal file
37
src/gateway/server-methods/agent-job.test.ts
Normal file
@@ -0,0 +1,37 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { emitAgentEvent } from "../../infra/agent-events.js";
|
||||
import { waitForAgentJob } from "./agent-job.js";
|
||||
|
||||
describe("waitForAgentJob", () => {
|
||||
it("maps lifecycle end events with aborted=true to timeout", async () => {
|
||||
const runId = `run-timeout-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
||||
const waitPromise = waitForAgentJob({ runId, timeoutMs: 1_000 });
|
||||
|
||||
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "start", startedAt: 100 } });
|
||||
emitAgentEvent({
|
||||
runId,
|
||||
stream: "lifecycle",
|
||||
data: { phase: "end", endedAt: 200, aborted: true },
|
||||
});
|
||||
|
||||
const snapshot = await waitPromise;
|
||||
expect(snapshot).not.toBeNull();
|
||||
expect(snapshot?.status).toBe("timeout");
|
||||
expect(snapshot?.startedAt).toBe(100);
|
||||
expect(snapshot?.endedAt).toBe(200);
|
||||
});
|
||||
|
||||
it("keeps non-aborted lifecycle end events as ok", async () => {
|
||||
const runId = `run-ok-${Date.now()}-${Math.random().toString(36).slice(2)}`;
|
||||
const waitPromise = waitForAgentJob({ runId, timeoutMs: 1_000 });
|
||||
|
||||
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "start", startedAt: 300 } });
|
||||
emitAgentEvent({ runId, stream: "lifecycle", data: { phase: "end", endedAt: 400 } });
|
||||
|
||||
const snapshot = await waitPromise;
|
||||
expect(snapshot).not.toBeNull();
|
||||
expect(snapshot?.status).toBe("ok");
|
||||
expect(snapshot?.startedAt).toBe(300);
|
||||
expect(snapshot?.endedAt).toBe(400);
|
||||
});
|
||||
});
|
||||
@@ -7,7 +7,7 @@ let agentRunListenerStarted = false;
|
||||
|
||||
type AgentRunSnapshot = {
|
||||
runId: string;
|
||||
status: "ok" | "error";
|
||||
status: "ok" | "error" | "timeout";
|
||||
startedAt?: number;
|
||||
endedAt?: number;
|
||||
error?: string;
|
||||
@@ -55,7 +55,7 @@ function ensureAgentRunListener() {
|
||||
agentRunStarts.delete(evt.runId);
|
||||
recordAgentRunSnapshot({
|
||||
runId: evt.runId,
|
||||
status: phase === "error" ? "error" : "ok",
|
||||
status: phase === "error" ? "error" : evt.data?.aborted ? "timeout" : "ok",
|
||||
startedAt,
|
||||
endedAt,
|
||||
error,
|
||||
@@ -118,7 +118,7 @@ export async function waitForAgentJob(params: {
|
||||
const error = typeof evt.data?.error === "string" ? evt.data.error : undefined;
|
||||
const snapshot: AgentRunSnapshot = {
|
||||
runId: evt.runId,
|
||||
status: phase === "error" ? "error" : "ok",
|
||||
status: phase === "error" ? "error" : evt.data?.aborted ? "timeout" : "ok",
|
||||
startedAt,
|
||||
endedAt,
|
||||
error,
|
||||
|
||||
Reference in New Issue
Block a user