diff --git a/CHANGELOG.md b/CHANGELOG.md index e54aceb11..f09946919 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -224,6 +224,7 @@ Docs: https://docs.openclaw.ai - Agents/Diagnostics: include resolved lifecycle error text in `embedded run agent end` warnings so UI/TUI “Connection error” runs expose actionable provider failure reasons in gateway logs. (#23054) Thanks @Raize. - Agents/Auth profiles: resolve `agentCommand` session scope before choosing `agentDir`/workspace so resumed runs no longer read auth from `agents/main/agent` when the resolved session belongs to a different/default agent (for example `agent:exec:*` sessions). (#24016) Thanks @abersonFAC. - Agents/Auth profiles: skip auth-profile cooldown writes for timeout failures in embedded runner rotation so model/network timeouts do not poison same-provider fallback model selection while still allowing in-turn account rotation. (#22622) Thanks @vageeshkumar. +- Agents/Failover: treat HTTP 502/503/504 errors as failover-eligible transient timeouts so fallback chains can switch providers/models during upstream outages instead of retrying the same failing target. (#20999) Thanks @taw0002 and @vincentkoc. - Plugins/Hooks: run legacy `before_agent_start` once per agent turn and reuse that result across model-resolve and prompt-build compatibility paths, preventing duplicate hook side effects (for example duplicate external API calls). (#23289) Thanks @ksato8710. - Models/Config: default missing Anthropic provider/model `api` fields to `anthropic-messages` during config validation so custom relay model entries are preserved instead of being dropped by runtime model registry validation. (#23332) Thanks @bigbigmonkey123. - Gateway/Pairing: preserve existing approved token scopes when processing repair pairings that omit `scopes`, preventing empty-scope token regressions on reconnecting clients. (#21906) Thanks @paki81. diff --git a/src/agents/failover-error.test.ts b/src/agents/failover-error.test.ts index ab31855cb..d7c1edccb 100644 --- a/src/agents/failover-error.test.ts +++ b/src/agents/failover-error.test.ts @@ -13,7 +13,10 @@ describe("failover-error", () => { expect(resolveFailoverReasonFromError({ status: 403 })).toBe("auth"); expect(resolveFailoverReasonFromError({ status: 408 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 400 })).toBe("format"); + // Transient server errors (502/503/504) should trigger failover as timeout. + expect(resolveFailoverReasonFromError({ status: 502 })).toBe("timeout"); expect(resolveFailoverReasonFromError({ status: 503 })).toBe("timeout"); + expect(resolveFailoverReasonFromError({ status: 504 })).toBe("timeout"); }); it("infers format errors from error messages", () => { diff --git a/src/agents/failover-error.ts b/src/agents/failover-error.ts index 766da7ccf..4de2babde 100644 --- a/src/agents/failover-error.ts +++ b/src/agents/failover-error.ts @@ -163,7 +163,7 @@ export function resolveFailoverReasonFromError(err: unknown): FailoverReason | n if (status === 408) { return "timeout"; } - if (status === 503) { + if (status === 502 || status === 503 || status === 504) { return "timeout"; } if (status === 400) { diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 3eb78cf95..d4b45f843 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -270,12 +270,12 @@ describe("isTransientHttpError", () => { expect(isTransientHttpError("500 Internal Server Error")).toBe(true); expect(isTransientHttpError("502 Bad Gateway")).toBe(true); expect(isTransientHttpError("503 Service Unavailable")).toBe(true); + expect(isTransientHttpError("504 Gateway Timeout")).toBe(true); expect(isTransientHttpError("521 ")).toBe(true); expect(isTransientHttpError("529 Overloaded")).toBe(true); }); it("returns false for non-retryable or non-http text", () => { - expect(isTransientHttpError("504 Gateway Timeout")).toBe(false); expect(isTransientHttpError("429 Too Many Requests")).toBe(false); expect(isTransientHttpError("network timeout")).toBe(false); }); diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index 68ee31f3f..1f4204fe1 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -120,7 +120,7 @@ const HTTP_STATUS_PREFIX_RE = /^(?:http\s*)?(\d{3})\s+(.+)$/i; const HTTP_STATUS_CODE_PREFIX_RE = /^(?:http\s*)?(\d{3})(?:\s+([\s\S]+))?$/i; const HTML_ERROR_PREFIX_RE = /^\s*(?: