import type { OpenClawConfig } from "../config/config.js"; import { resolveAgentModelFallbackValues, resolveAgentModelPrimaryValue, } from "../config/model-input.js"; import { ensureAuthProfileStore, getSoonestCooldownExpiry, isProfileInCooldown, resolveProfilesUnavailableReason, resolveAuthProfileOrder, } from "./auth-profiles.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "./defaults.js"; import { coerceToFailoverError, describeFailoverError, isFailoverError, isTimeoutError, } from "./failover-error.js"; import { buildConfiguredAllowlistKeys, buildModelAliasIndex, modelKey, normalizeModelRef, resolveConfiguredModelRef, resolveModelRefFromString, } from "./model-selection.js"; import type { FailoverReason } from "./pi-embedded-helpers.js"; import { isLikelyContextOverflowError } from "./pi-embedded-helpers.js"; type ModelCandidate = { provider: string; model: string; }; type FallbackAttempt = { provider: string; model: string; error: string; reason?: FailoverReason; status?: number; code?: string; }; /** * Fallback abort check. Only treats explicit AbortError names as user aborts. * Message-based checks (e.g., "aborted") can mask timeouts and skip fallback. */ function isFallbackAbortError(err: unknown): boolean { if (!err || typeof err !== "object") { return false; } if (isFailoverError(err)) { return false; } const name = "name" in err ? String(err.name) : ""; return name === "AbortError"; } function shouldRethrowAbort(err: unknown): boolean { return isFallbackAbortError(err) && !isTimeoutError(err); } function createModelCandidateCollector(allowlist: Set | null | undefined): { candidates: ModelCandidate[]; addExplicitCandidate: (candidate: ModelCandidate) => void; addAllowlistedCandidate: (candidate: ModelCandidate) => void; } { const seen = new Set(); const candidates: ModelCandidate[] = []; const addCandidate = (candidate: ModelCandidate, enforceAllowlist: boolean) => { if (!candidate.provider || !candidate.model) { return; } const key = modelKey(candidate.provider, candidate.model); if (seen.has(key)) { return; } if (enforceAllowlist && allowlist && !allowlist.has(key)) { return; } seen.add(key); candidates.push(candidate); }; const addExplicitCandidate = (candidate: ModelCandidate) => { addCandidate(candidate, false); }; const addAllowlistedCandidate = (candidate: ModelCandidate) => { addCandidate(candidate, true); }; return { candidates, addExplicitCandidate, addAllowlistedCandidate }; } type ModelFallbackErrorHandler = (attempt: { provider: string; model: string; error: unknown; attempt: number; total: number; }) => void | Promise; type ModelFallbackRunResult = { result: T; provider: string; model: string; attempts: FallbackAttempt[]; }; function sameModelCandidate(a: ModelCandidate, b: ModelCandidate): boolean { return a.provider === b.provider && a.model === b.model; } function throwFallbackFailureSummary(params: { attempts: FallbackAttempt[]; candidates: ModelCandidate[]; lastError: unknown; label: string; formatAttempt: (attempt: FallbackAttempt) => string; }): never { if (params.attempts.length <= 1 && params.lastError) { throw params.lastError; } const summary = params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown"; throw new Error( `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`, { cause: params.lastError instanceof Error ? params.lastError : undefined, }, ); } function resolveImageFallbackCandidates(params: { cfg: OpenClawConfig | undefined; defaultProvider: string; modelOverride?: string; }): ModelCandidate[] { const aliasIndex = buildModelAliasIndex({ cfg: params.cfg ?? {}, defaultProvider: params.defaultProvider, }); const allowlist = buildConfiguredAllowlistKeys({ cfg: params.cfg, defaultProvider: params.defaultProvider, }); const { candidates, addExplicitCandidate, addAllowlistedCandidate } = createModelCandidateCollector(allowlist); const addRaw = (raw: string, opts?: { allowlist?: boolean }) => { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider: params.defaultProvider, aliasIndex, }); if (!resolved) { return; } if (opts?.allowlist) { addAllowlistedCandidate(resolved.ref); return; } addExplicitCandidate(resolved.ref); }; if (params.modelOverride?.trim()) { addRaw(params.modelOverride); } else { const primary = resolveAgentModelPrimaryValue(params.cfg?.agents?.defaults?.imageModel); if (primary?.trim()) { addRaw(primary); } } const imageFallbacks = resolveAgentModelFallbackValues(params.cfg?.agents?.defaults?.imageModel); for (const raw of imageFallbacks) { // Explicitly configured image fallbacks should remain reachable even when a // model allowlist is present. addRaw(raw); } return candidates; } function resolveFallbackCandidates(params: { cfg: OpenClawConfig | undefined; provider: string; model: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; }): ModelCandidate[] { const primary = params.cfg ? resolveConfiguredModelRef({ cfg: params.cfg, defaultProvider: DEFAULT_PROVIDER, defaultModel: DEFAULT_MODEL, }) : null; const defaultProvider = primary?.provider ?? DEFAULT_PROVIDER; const defaultModel = primary?.model ?? DEFAULT_MODEL; const providerRaw = String(params.provider ?? "").trim() || defaultProvider; const modelRaw = String(params.model ?? "").trim() || defaultModel; const normalizedPrimary = normalizeModelRef(providerRaw, modelRaw); const configuredPrimary = normalizeModelRef(defaultProvider, defaultModel); const aliasIndex = buildModelAliasIndex({ cfg: params.cfg ?? {}, defaultProvider, }); const allowlist = buildConfiguredAllowlistKeys({ cfg: params.cfg, defaultProvider, }); const { candidates, addExplicitCandidate } = createModelCandidateCollector(allowlist); addExplicitCandidate(normalizedPrimary); const modelFallbacks = (() => { if (params.fallbacksOverride !== undefined) { return params.fallbacksOverride; } const configuredFallbacks = resolveAgentModelFallbackValues( params.cfg?.agents?.defaults?.model, ); // When user runs a different provider than config, only use configured fallbacks // if the current model is already in that chain (e.g. session on first fallback). if (normalizedPrimary.provider !== configuredPrimary.provider) { const isConfiguredFallback = configuredFallbacks.some((raw) => { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider, aliasIndex, }); return resolved ? sameModelCandidate(resolved.ref, normalizedPrimary) : false; }); return isConfiguredFallback ? configuredFallbacks : []; } // Same provider: always use full fallback chain (model version differences within provider). return configuredFallbacks; })(); for (const raw of modelFallbacks) { const resolved = resolveModelRefFromString({ raw: String(raw ?? ""), defaultProvider, aliasIndex, }); if (!resolved) { continue; } // Fallbacks are explicit user intent; do not silently filter them by the // model allowlist. addExplicitCandidate(resolved.ref); } if (params.fallbacksOverride === undefined && primary?.provider && primary.model) { addExplicitCandidate({ provider: primary.provider, model: primary.model }); } return candidates; } const lastProbeAttempt = new Map(); const MIN_PROBE_INTERVAL_MS = 30_000; // 30 seconds between probes per key const PROBE_MARGIN_MS = 2 * 60 * 1000; const PROBE_SCOPE_DELIMITER = "::"; function resolveProbeThrottleKey(provider: string, agentDir?: string): string { const scope = String(agentDir ?? "").trim(); return scope ? `${scope}${PROBE_SCOPE_DELIMITER}${provider}` : provider; } function shouldProbePrimaryDuringCooldown(params: { isPrimary: boolean; hasFallbackCandidates: boolean; now: number; throttleKey: string; authStore: ReturnType; profileIds: string[]; }): boolean { if (!params.isPrimary || !params.hasFallbackCandidates) { return false; } const lastProbe = lastProbeAttempt.get(params.throttleKey) ?? 0; if (params.now - lastProbe < MIN_PROBE_INTERVAL_MS) { return false; } const soonest = getSoonestCooldownExpiry(params.authStore, params.profileIds); if (soonest === null || !Number.isFinite(soonest)) { return true; } // Probe when cooldown already expired or within the configured margin. return params.now >= soonest - PROBE_MARGIN_MS; } /** @internal – exposed for unit tests only */ export const _probeThrottleInternals = { lastProbeAttempt, MIN_PROBE_INTERVAL_MS, PROBE_MARGIN_MS, resolveProbeThrottleKey, } as const; type CooldownDecision = | { type: "skip"; reason: FailoverReason; error: string; } | { type: "attempt"; reason: FailoverReason; markProbe: boolean; }; function resolveCooldownDecision(params: { candidate: ModelCandidate; isPrimary: boolean; requestedModel: boolean; hasFallbackCandidates: boolean; now: number; probeThrottleKey: string; authStore: ReturnType; profileIds: string[]; }): CooldownDecision { const shouldProbe = shouldProbePrimaryDuringCooldown({ isPrimary: params.isPrimary, hasFallbackCandidates: params.hasFallbackCandidates, now: params.now, throttleKey: params.probeThrottleKey, authStore: params.authStore, profileIds: params.profileIds, }); const inferredReason = resolveProfilesUnavailableReason({ store: params.authStore, profileIds: params.profileIds, now: params.now, }) ?? "rate_limit"; const isPersistentIssue = inferredReason === "auth" || inferredReason === "auth_permanent" || inferredReason === "billing"; if (isPersistentIssue) { return { type: "skip", reason: inferredReason, error: `Provider ${params.candidate.provider} has ${inferredReason} issue (skipping all models)`, }; } // For primary: try when requested model or when probe allows. // For same-provider fallbacks: only relax cooldown on rate_limit, which // is commonly model-scoped and can recover on a sibling model. const shouldAttemptDespiteCooldown = (params.isPrimary && (!params.requestedModel || shouldProbe)) || (!params.isPrimary && inferredReason === "rate_limit"); if (!shouldAttemptDespiteCooldown) { return { type: "skip", reason: inferredReason, error: `Provider ${params.candidate.provider} is in cooldown (all profiles unavailable)`, }; } return { type: "attempt", reason: inferredReason, markProbe: params.isPrimary && shouldProbe, }; } export async function runWithModelFallback(params: { cfg: OpenClawConfig | undefined; provider: string; model: string; agentDir?: string; /** Optional explicit fallbacks list; when provided (even empty), replaces agents.defaults.model.fallbacks. */ fallbacksOverride?: string[]; run: (provider: string, model: string) => Promise; onError?: ModelFallbackErrorHandler; }): Promise> { const candidates = resolveFallbackCandidates({ cfg: params.cfg, provider: params.provider, model: params.model, fallbacksOverride: params.fallbacksOverride, }); const authStore = params.cfg ? ensureAuthProfileStore(params.agentDir, { allowKeychainPrompt: false }) : null; const attempts: FallbackAttempt[] = []; let lastError: unknown; const hasFallbackCandidates = candidates.length > 1; for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; if (authStore) { const profileIds = resolveAuthProfileOrder({ cfg: params.cfg, store: authStore, provider: candidate.provider, }); const isAnyProfileAvailable = profileIds.some((id) => !isProfileInCooldown(authStore, id)); if (profileIds.length > 0 && !isAnyProfileAvailable) { // All profiles for this provider are in cooldown. const isPrimary = i === 0; const requestedModel = params.provider === candidate.provider && params.model === candidate.model; const now = Date.now(); const probeThrottleKey = resolveProbeThrottleKey(candidate.provider, params.agentDir); const decision = resolveCooldownDecision({ candidate, isPrimary, requestedModel, hasFallbackCandidates, now, probeThrottleKey, authStore, profileIds, }); if (decision.type === "skip") { attempts.push({ provider: candidate.provider, model: candidate.model, error: decision.error, reason: decision.reason, }); continue; } if (decision.markProbe) { lastProbeAttempt.set(probeThrottleKey, now); } } } try { const result = await params.run(candidate.provider, candidate.model); return { result, provider: candidate.provider, model: candidate.model, attempts, }; } catch (err) { if (shouldRethrowAbort(err)) { throw err; } // Context overflow errors should be handled by the inner runner's // compaction/retry logic, not by model fallback. If one escapes as a // throw, rethrow it immediately rather than trying a different model // that may have a smaller context window and fail worse. const errMessage = err instanceof Error ? err.message : String(err); if (isLikelyContextOverflowError(errMessage)) { throw err; } const normalized = coerceToFailoverError(err, { provider: candidate.provider, model: candidate.model, }) ?? err; // Even unrecognized errors should not abort the fallback loop when // there are remaining candidates. Only abort/context-overflow errors // (handled above) are truly non-retryable. const isKnownFailover = isFailoverError(normalized); if (!isKnownFailover && i === candidates.length - 1) { throw err; } lastError = isKnownFailover ? normalized : err; const described = describeFailoverError(normalized); attempts.push({ provider: candidate.provider, model: candidate.model, error: described.message, reason: described.reason ?? "unknown", status: described.status, code: described.code, }); await params.onError?.({ provider: candidate.provider, model: candidate.model, error: isKnownFailover ? normalized : err, attempt: i + 1, total: candidates.length, }); } } throwFallbackFailureSummary({ attempts, candidates, lastError, label: "models", formatAttempt: (attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}${ attempt.reason ? ` (${attempt.reason})` : "" }`, }); } export async function runWithImageModelFallback(params: { cfg: OpenClawConfig | undefined; modelOverride?: string; run: (provider: string, model: string) => Promise; onError?: ModelFallbackErrorHandler; }): Promise> { const candidates = resolveImageFallbackCandidates({ cfg: params.cfg, defaultProvider: DEFAULT_PROVIDER, modelOverride: params.modelOverride, }); if (candidates.length === 0) { throw new Error( "No image model configured. Set agents.defaults.imageModel.primary or agents.defaults.imageModel.fallbacks.", ); } const attempts: FallbackAttempt[] = []; let lastError: unknown; for (let i = 0; i < candidates.length; i += 1) { const candidate = candidates[i]; try { const result = await params.run(candidate.provider, candidate.model); return { result, provider: candidate.provider, model: candidate.model, attempts, }; } catch (err) { if (shouldRethrowAbort(err)) { throw err; } lastError = err; attempts.push({ provider: candidate.provider, model: candidate.model, error: err instanceof Error ? err.message : String(err), }); await params.onError?.({ provider: candidate.provider, model: candidate.model, error: err, attempt: i + 1, total: candidates.length, }); } } throwFallbackFailureSummary({ attempts, candidates, lastError, label: "image models", formatAttempt: (attempt) => `${attempt.provider}/${attempt.model}: ${attempt.error}`, }); }