From f16ecd1dac5d052d4d6e3f841b8c21ad12da5228 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 27 Feb 2026 17:20:47 -0800 Subject: [PATCH] fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205) * fix(ollama): inject num_ctx for OpenAI-compatible transport * fix(ollama): discover per-model context and preserve higher limits * fix(agents): prefer matching provider model for fallback limits * fix(types): require numeric token limits in provider model merge * fix(types): accept unknown payload in ollama num_ctx wrapper * fix(types): simplify ollama settled-result extraction * config(models): add provider flag for Ollama OpenAI num_ctx injection * config(schema): allow provider num_ctx injection flag * config(labels): label provider num_ctx injection flag * config(help): document provider num_ctx injection flag * agents(ollama): gate OpenAI num_ctx injection with provider config * tests(ollama): cover provider num_ctx injection flag behavior * docs(config): list provider num_ctx injection option * docs(ollama): document OpenAI num_ctx injection toggle * docs(config): clarify merge token-limit precedence * config(help): note merge uses higher model token limits * fix(ollama): cap /api/show discovery concurrency * fix(ollama): restrict num_ctx injection to OpenAI compat * tests(ollama): cover ipv6 and compat num_ctx gating * fix(ollama): detect remote compat endpoints for ollama-labeled providers * fix(ollama): cap per-model /api/show lookups to bound discovery load --- docs/gateway/configuration-reference.md | 2 + docs/providers/ollama.md | 19 +++ ...ssing-provider-apikey-from-env-var.test.ts | 53 ++++++ .../models-config.providers.ollama.test.ts | 111 +++++++++++- src/agents/models-config.providers.ts | 77 +++++++-- src/agents/models-config.ts | 13 +- src/agents/pi-embedded-runner/model.test.ts | 29 ++++ src/agents/pi-embedded-runner/model.ts | 11 +- .../pi-embedded-runner/run/attempt.test.ts | 160 ++++++++++++++++++ src/agents/pi-embedded-runner/run/attempt.ts | 121 ++++++++++++- src/config/schema.help.ts | 4 +- src/config/schema.labels.ts | 1 + src/config/types.models.ts | 1 + src/config/zod-schema.core.ts | 1 + 14 files changed, 582 insertions(+), 21 deletions(-) diff --git a/docs/gateway/configuration-reference.md b/docs/gateway/configuration-reference.md index c816705e8..cc0c0cb52 100644 --- a/docs/gateway/configuration-reference.md +++ b/docs/gateway/configuration-reference.md @@ -1863,6 +1863,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model - Merge precedence for matching provider IDs: - Non-empty agent `models.json` `apiKey`/`baseUrl` win. - Empty or missing agent `apiKey`/`baseUrl` fall back to `models.providers` in config. + - Matching model `contextWindow`/`maxTokens` use the higher value between explicit config and implicit catalog values. - Use `models.mode: "replace"` when you want config to fully rewrite `models.json`. ### Provider field details @@ -1872,6 +1873,7 @@ OpenClaw uses the pi-coding-agent model catalog. Add custom providers via `model - `models.providers.*.api`: request adapter (`openai-completions`, `openai-responses`, `anthropic-messages`, `google-generative-ai`, etc). - `models.providers.*.apiKey`: provider credential (prefer SecretRef/env substitution). - `models.providers.*.auth`: auth strategy (`api-key`, `token`, `oauth`, `aws-sdk`). +- `models.providers.*.injectNumCtxForOpenAICompat`: for Ollama + `openai-completions`, inject `options.num_ctx` into requests (default: `true`). - `models.providers.*.authHeader`: force credential transport in the `Authorization` header when required. - `models.providers.*.baseUrl`: upstream API base URL. - `models.providers.*.headers`: extra static headers for proxy/tenant routing. diff --git a/docs/providers/ollama.md b/docs/providers/ollama.md index 98b39954d..b82f6411b 100644 --- a/docs/providers/ollama.md +++ b/docs/providers/ollama.md @@ -199,6 +199,7 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy ollama: { baseUrl: "http://ollama-host:11434/v1", api: "openai-completions", + injectNumCtxForOpenAICompat: true, // default: true apiKey: "ollama-local", models: [...] } @@ -209,6 +210,24 @@ If you need to use the OpenAI-compatible endpoint instead (e.g., behind a proxy This mode may not support streaming + tool calling simultaneously. You may need to disable streaming with `params: { streaming: false }` in model config. +When `api: "openai-completions"` is used with Ollama, OpenClaw injects `options.num_ctx` by default so Ollama does not silently fall back to a 4096 context window. If your proxy/upstream rejects unknown `options` fields, disable this behavior: + +```json5 +{ + models: { + providers: { + ollama: { + baseUrl: "http://ollama-host:11434/v1", + api: "openai-completions", + injectNumCtxForOpenAICompat: false, + apiKey: "ollama-local", + models: [...] + } + } + } +} +``` + ### Context windows For auto-discovered models, OpenClaw uses the context window reported by Ollama when available, otherwise it defaults to `8192`. You can override `contextWindow` and `maxTokens` in explicit provider config. diff --git a/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts b/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts index 4abfa4f1a..e7ddd2f58 100644 --- a/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts +++ b/src/agents/models-config.fills-missing-provider-apikey-from-env-var.test.ts @@ -307,4 +307,57 @@ describe("models-config", () => { } }); }); + + it("preserves explicit larger token limits when they exceed implicit catalog defaults", async () => { + await withTempHome(async () => { + const prevKey = process.env.MOONSHOT_API_KEY; + process.env.MOONSHOT_API_KEY = "sk-moonshot-test"; + try { + const cfg: OpenClawConfig = { + models: { + providers: { + moonshot: { + baseUrl: "https://api.moonshot.ai/v1", + api: "openai-completions", + models: [ + { + id: "kimi-k2.5", + name: "Kimi K2.5", + reasoning: false, + input: ["text"], + cost: { input: 123, output: 456, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 350000, + maxTokens: 16384, + }, + ], + }, + }, + }, + }; + + await ensureOpenClawModelsJson(cfg); + const parsed = await readGeneratedModelsJson<{ + providers: Record< + string, + { + models?: Array<{ + id: string; + contextWindow?: number; + maxTokens?: number; + }>; + } + >; + }>(); + const kimi = parsed.providers.moonshot?.models?.find((model) => model.id === "kimi-k2.5"); + expect(kimi?.contextWindow).toBe(350000); + expect(kimi?.maxTokens).toBe(16384); + } finally { + if (prevKey === undefined) { + delete process.env.MOONSHOT_API_KEY; + } else { + process.env.MOONSHOT_API_KEY = prevKey; + } + } + }); + }); }); diff --git a/src/agents/models-config.providers.ollama.test.ts b/src/agents/models-config.providers.ollama.test.ts index 263ef5574..d007e5f8d 100644 --- a/src/agents/models-config.providers.ollama.test.ts +++ b/src/agents/models-config.providers.ollama.test.ts @@ -1,9 +1,14 @@ import { mkdtempSync } from "node:fs"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import { describe, expect, it } from "vitest"; +import { afterEach, describe, expect, it, vi } from "vitest"; import { resolveImplicitProviders, resolveOllamaApiBase } from "./models-config.providers.js"; +afterEach(() => { + vi.unstubAllEnvs(); + vi.unstubAllGlobals(); +}); + describe("resolveOllamaApiBase", () => { it("returns default localhost base when no configured URL is provided", () => { expect(resolveOllamaApiBase()).toBe("http://127.0.0.1:11434"); @@ -71,6 +76,110 @@ describe("Ollama provider", () => { } }); + it("discovers per-model context windows from /api/show", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + process.env.OLLAMA_API_KEY = "test-key"; + vi.stubEnv("VITEST", ""); + vi.stubEnv("NODE_ENV", "development"); + const fetchMock = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + models: [ + { name: "qwen3:32b", modified_at: "", size: 1, digest: "" }, + { name: "llama3.3:70b", modified_at: "", size: 1, digest: "" }, + ], + }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ model_info: { "qwen3.context_length": 131072 } }), + }) + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ model_info: { "llama.context_length": 65536 } }), + }); + vi.stubGlobal("fetch", fetchMock); + + try { + const providers = await resolveImplicitProviders({ agentDir }); + const models = providers?.ollama?.models ?? []; + const qwen = models.find((model) => model.id === "qwen3:32b"); + const llama = models.find((model) => model.id === "llama3.3:70b"); + expect(qwen?.contextWindow).toBe(131072); + expect(llama?.contextWindow).toBe(65536); + expect(fetchMock).toHaveBeenCalledTimes(3); + } finally { + delete process.env.OLLAMA_API_KEY; + } + }); + + it("falls back to default context window when /api/show fails", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + process.env.OLLAMA_API_KEY = "test-key"; + vi.stubEnv("VITEST", ""); + vi.stubEnv("NODE_ENV", "development"); + const fetchMock = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + json: async () => ({ + models: [{ name: "qwen3:32b", modified_at: "", size: 1, digest: "" }], + }), + }) + .mockResolvedValueOnce({ + ok: false, + status: 500, + }); + vi.stubGlobal("fetch", fetchMock); + + try { + const providers = await resolveImplicitProviders({ agentDir }); + const model = providers?.ollama?.models?.find((entry) => entry.id === "qwen3:32b"); + expect(model?.contextWindow).toBe(128000); + expect(fetchMock).toHaveBeenCalledTimes(2); + } finally { + delete process.env.OLLAMA_API_KEY; + } + }); + + it("caps /api/show requests when /api/tags returns a very large model list", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + process.env.OLLAMA_API_KEY = "test-key"; + vi.stubEnv("VITEST", ""); + vi.stubEnv("NODE_ENV", "development"); + const manyModels = Array.from({ length: 250 }, (_, idx) => ({ + name: `model-${idx}`, + modified_at: "", + size: 1, + digest: "", + })); + const fetchMock = vi.fn(async (url: string) => { + if (url.endsWith("/api/tags")) { + return { + ok: true, + json: async () => ({ models: manyModels }), + }; + } + return { + ok: true, + json: async () => ({ model_info: { "llama.context_length": 65536 } }), + }; + }); + vi.stubGlobal("fetch", fetchMock); + + try { + const providers = await resolveImplicitProviders({ agentDir }); + const models = providers?.ollama?.models ?? []; + // 1 call for /api/tags + 200 capped /api/show calls. + expect(fetchMock).toHaveBeenCalledTimes(201); + expect(models).toHaveLength(200); + } finally { + delete process.env.OLLAMA_API_KEY; + } + }); + it("should have correct model structure without streaming override", () => { const mockOllamaModel = { id: "llama3.3:latest", diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index 64b8d538f..012052d3f 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -144,6 +144,8 @@ const QWEN_PORTAL_DEFAULT_COST = { const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL; const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL; +const OLLAMA_SHOW_CONCURRENCY = 8; +const OLLAMA_SHOW_MAX_MODELS = 200; const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000; const OLLAMA_DEFAULT_MAX_TOKENS = 8192; const OLLAMA_DEFAULT_COST = { @@ -236,6 +238,38 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string { return trimmed.replace(/\/v1$/i, ""); } +async function queryOllamaContextWindow( + apiBase: string, + modelName: string, +): Promise { + try { + const response = await fetch(`${apiBase}/api/show`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ name: modelName }), + signal: AbortSignal.timeout(3000), + }); + if (!response.ok) { + return undefined; + } + const data = (await response.json()) as { model_info?: Record }; + if (!data.model_info) { + return undefined; + } + for (const [key, value] of Object.entries(data.model_info)) { + if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) { + const contextWindow = Math.floor(value); + if (contextWindow > 0) { + return contextWindow; + } + } + } + return undefined; + } catch { + return undefined; + } +} + async function discoverOllamaModels( baseUrl?: string, opts?: { quiet?: boolean }, @@ -260,20 +294,35 @@ async function discoverOllamaModels( log.debug("No Ollama models found on local instance"); return []; } - return data.models.map((model) => { - const modelId = model.name; - const isReasoning = - modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning"); - return { - id: modelId, - name: modelId, - reasoning: isReasoning, - input: ["text"], - cost: OLLAMA_DEFAULT_COST, - contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW, - maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, - }; - }); + const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS); + if (modelsToInspect.length < data.models.length && !opts?.quiet) { + log.warn( + `Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`, + ); + } + const discovered: ModelDefinitionConfig[] = []; + for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) { + const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY); + const batchDiscovered = await Promise.all( + batch.map(async (model) => { + const modelId = model.name; + const contextWindow = await queryOllamaContextWindow(apiBase, modelId); + const isReasoning = + modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning"); + return { + id: modelId, + name: modelId, + reasoning: isReasoning, + input: ["text"], + cost: OLLAMA_DEFAULT_COST, + contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW, + maxTokens: OLLAMA_DEFAULT_MAX_TOKENS, + } satisfies ModelDefinitionConfig; + }), + ); + discovered.push(...batchDiscovered); + } + return discovered; } catch (error) { if (!opts?.quiet) { log.warn(`Failed to discover Ollama models: ${String(error)}`); diff --git a/src/agents/models-config.ts b/src/agents/models-config.ts index 3b02737eb..b7b94bff3 100644 --- a/src/agents/models-config.ts +++ b/src/agents/models-config.ts @@ -15,6 +15,12 @@ type ModelsConfig = NonNullable; const DEFAULT_MODE: NonNullable = "merge"; +function resolvePreferredTokenLimit(explicitValue: number, implicitValue: number): number { + // Keep catalog refresh behavior for stale low values while preserving + // intentional larger user overrides (for example Ollama >128k contexts). + return explicitValue > implicitValue ? explicitValue : implicitValue; +} + function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig): ProviderConfig { const implicitModels = Array.isArray(implicit.models) ? implicit.models : []; const explicitModels = Array.isArray(explicit.models) ? explicit.models : []; @@ -55,8 +61,11 @@ function mergeProviderModels(implicit: ProviderConfig, explicit: ProviderConfig) ...explicitModel, input: implicitModel.input, reasoning: "reasoning" in explicitModel ? explicitModel.reasoning : implicitModel.reasoning, - contextWindow: implicitModel.contextWindow, - maxTokens: implicitModel.maxTokens, + contextWindow: resolvePreferredTokenLimit( + explicitModel.contextWindow, + implicitModel.contextWindow, + ), + maxTokens: resolvePreferredTokenLimit(explicitModel.maxTokens, implicitModel.maxTokens), }; }); diff --git a/src/agents/pi-embedded-runner/model.test.ts b/src/agents/pi-embedded-runner/model.test.ts index f0fb13426..7a5918a11 100644 --- a/src/agents/pi-embedded-runner/model.test.ts +++ b/src/agents/pi-embedded-runner/model.test.ts @@ -171,6 +171,35 @@ describe("resolveModel", () => { expect(result.model?.id).toBe("missing-model"); }); + it("prefers matching configured model metadata for fallback token limits", () => { + const cfg = { + models: { + providers: { + custom: { + baseUrl: "http://localhost:9000", + models: [ + { + ...makeModel("model-a"), + contextWindow: 4096, + maxTokens: 1024, + }, + { + ...makeModel("model-b"), + contextWindow: 262144, + maxTokens: 32768, + }, + ], + }, + }, + }, + } as OpenClawConfig; + + const result = resolveModel("custom", "model-b", "/tmp/agent", cfg); + + expect(result.model?.contextWindow).toBe(262144); + expect(result.model?.maxTokens).toBe(32768); + }); + it("builds an openai-codex fallback for gpt-5.3-codex", () => { mockOpenAICodexTemplateModel(); diff --git a/src/agents/pi-embedded-runner/model.ts b/src/agents/pi-embedded-runner/model.ts index 16aea8b4c..313c5f552 100644 --- a/src/agents/pi-embedded-runner/model.ts +++ b/src/agents/pi-embedded-runner/model.ts @@ -96,6 +96,7 @@ export function resolveModel( } const providerCfg = providers[provider]; if (providerCfg || modelId.startsWith("mock-")) { + const configuredModel = providerCfg?.models?.find((candidate) => candidate.id === modelId); const fallbackModel: Model = normalizeModelCompat({ id: modelId, name: modelId, @@ -105,8 +106,14 @@ export function resolveModel( reasoning: false, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, - contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS, - maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + contextWindow: + configuredModel?.contextWindow ?? + providerCfg?.models?.[0]?.contextWindow ?? + DEFAULT_CONTEXT_TOKENS, + maxTokens: + configuredModel?.maxTokens ?? + providerCfg?.models?.[0]?.maxTokens ?? + DEFAULT_CONTEXT_TOKENS, } as Model); return { model: fallbackModel, authStorage, modelRegistry }; } diff --git a/src/agents/pi-embedded-runner/run/attempt.test.ts b/src/agents/pi-embedded-runner/run/attempt.test.ts index 7f2a05b02..cb83508ab 100644 --- a/src/agents/pi-embedded-runner/run/attempt.test.ts +++ b/src/agents/pi-embedded-runner/run/attempt.test.ts @@ -1,9 +1,13 @@ import { describe, expect, it, vi } from "vitest"; import type { OpenClawConfig } from "../../../config/config.js"; import { + isOllamaCompatProvider, resolveAttemptFsWorkspaceOnly, + resolveOllamaCompatNumCtxEnabled, resolvePromptBuildHookResult, resolvePromptModeForSession, + shouldInjectOllamaCompatNumCtx, + wrapOllamaCompatNumCtx, wrapStreamFnTrimToolCallNames, } from "./attempt.js"; @@ -174,3 +178,159 @@ describe("wrapStreamFnTrimToolCallNames", () => { expect(baseFn).toHaveBeenCalledTimes(1); }); }); + +describe("isOllamaCompatProvider", () => { + it("detects native ollama provider id", () => { + expect( + isOllamaCompatProvider({ + provider: "ollama", + api: "openai-completions", + baseUrl: "https://example.com/v1", + }), + ).toBe(true); + }); + + it("detects localhost Ollama OpenAI-compatible endpoint", () => { + expect( + isOllamaCompatProvider({ + provider: "custom", + api: "openai-completions", + baseUrl: "http://127.0.0.1:11434/v1", + }), + ).toBe(true); + }); + + it("does not misclassify non-local OpenAI-compatible providers", () => { + expect( + isOllamaCompatProvider({ + provider: "custom", + api: "openai-completions", + baseUrl: "https://api.openrouter.ai/v1", + }), + ).toBe(false); + }); + + it("detects remote Ollama-compatible endpoint when provider id hints ollama", () => { + expect( + isOllamaCompatProvider({ + provider: "my-ollama", + api: "openai-completions", + baseUrl: "http://ollama-host:11434/v1", + }), + ).toBe(true); + }); + + it("detects IPv6 loopback Ollama OpenAI-compatible endpoint", () => { + expect( + isOllamaCompatProvider({ + provider: "custom", + api: "openai-completions", + baseUrl: "http://[::1]:11434/v1", + }), + ).toBe(true); + }); + + it("does not classify arbitrary remote hosts on 11434 without ollama provider hint", () => { + expect( + isOllamaCompatProvider({ + provider: "custom", + api: "openai-completions", + baseUrl: "http://example.com:11434/v1", + }), + ).toBe(false); + }); +}); + +describe("wrapOllamaCompatNumCtx", () => { + it("injects num_ctx and preserves downstream onPayload hooks", () => { + let payloadSeen: Record | undefined; + const baseFn = vi.fn((_model, _context, options) => { + const payload: Record = { options: { temperature: 0.1 } }; + options?.onPayload?.(payload); + payloadSeen = payload; + return {} as never; + }); + const downstream = vi.fn(); + + const wrapped = wrapOllamaCompatNumCtx(baseFn as never, 202752); + void wrapped({} as never, {} as never, { onPayload: downstream } as never); + + expect(baseFn).toHaveBeenCalledTimes(1); + expect((payloadSeen?.options as Record | undefined)?.num_ctx).toBe(202752); + expect(downstream).toHaveBeenCalledTimes(1); + }); +}); + +describe("resolveOllamaCompatNumCtxEnabled", () => { + it("defaults to true when config is missing", () => { + expect(resolveOllamaCompatNumCtxEnabled({ providerId: "ollama" })).toBe(true); + }); + + it("defaults to true when provider config is missing", () => { + expect( + resolveOllamaCompatNumCtxEnabled({ + config: { models: { providers: {} } }, + providerId: "ollama", + }), + ).toBe(true); + }); + + it("returns false when provider flag is explicitly disabled", () => { + expect( + resolveOllamaCompatNumCtxEnabled({ + config: { + models: { + providers: { + ollama: { + baseUrl: "http://127.0.0.1:11434/v1", + api: "openai-completions", + injectNumCtxForOpenAICompat: false, + models: [], + }, + }, + }, + }, + providerId: "ollama", + }), + ).toBe(false); + }); +}); + +describe("shouldInjectOllamaCompatNumCtx", () => { + it("requires openai-completions adapter", () => { + expect( + shouldInjectOllamaCompatNumCtx({ + model: { + provider: "ollama", + api: "openai-responses", + baseUrl: "http://127.0.0.1:11434/v1", + }, + }), + ).toBe(false); + }); + + it("respects provider flag disablement", () => { + expect( + shouldInjectOllamaCompatNumCtx({ + model: { + provider: "ollama", + api: "openai-completions", + baseUrl: "http://127.0.0.1:11434/v1", + }, + config: { + models: { + providers: { + ollama: { + baseUrl: "http://127.0.0.1:11434/v1", + api: "openai-completions", + injectNumCtxForOpenAICompat: false, + models: [], + }, + }, + }, + }, + providerId: "ollama", + }), + ).toBe(false); + }); +}); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 08706eb57..035a84ba0 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -40,7 +40,7 @@ import { resolveOpenClawDocsPath } from "../../docs-path.js"; import { isTimeoutError } from "../../failover-error.js"; import { resolveImageSanitizationLimits } from "../../image-sanitization.js"; import { resolveModelAuthMode } from "../../model-auth.js"; -import { resolveDefaultModelForAgent } from "../../model-selection.js"; +import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js"; import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js"; import { resolveOwnerDisplaySetting } from "../../owner-display.js"; import { @@ -127,6 +127,104 @@ type PromptBuildHookRunner = { ) => Promise; }; +export function isOllamaCompatProvider(model: { + provider?: string; + baseUrl?: string; + api?: string; +}): boolean { + const providerId = normalizeProviderId(model.provider ?? ""); + if (providerId === "ollama") { + return true; + } + if (!model.baseUrl) { + return false; + } + try { + const parsed = new URL(model.baseUrl); + const hostname = parsed.hostname.toLowerCase(); + const isLocalhost = + hostname === "localhost" || + hostname === "127.0.0.1" || + hostname === "::1" || + hostname === "[::1]"; + if (isLocalhost && parsed.port === "11434") { + return true; + } + + // Allow remote/LAN Ollama OpenAI-compatible endpoints when the provider id + // itself indicates Ollama usage (e.g. "my-ollama"). + const providerHintsOllama = providerId.includes("ollama"); + const isOllamaPort = parsed.port === "11434"; + const isOllamaCompatPath = parsed.pathname === "/" || /^\/v1\/?$/i.test(parsed.pathname); + return providerHintsOllama && isOllamaPort && isOllamaCompatPath; + } catch { + return false; + } +} + +export function resolveOllamaCompatNumCtxEnabled(params: { + config?: OpenClawConfig; + providerId?: string; +}): boolean { + const providerId = params.providerId?.trim(); + if (!providerId) { + return true; + } + const providers = params.config?.models?.providers; + if (!providers) { + return true; + } + const direct = providers[providerId]; + if (direct) { + return direct.injectNumCtxForOpenAICompat ?? true; + } + const normalized = normalizeProviderId(providerId); + for (const [candidateId, candidate] of Object.entries(providers)) { + if (normalizeProviderId(candidateId) === normalized) { + return candidate.injectNumCtxForOpenAICompat ?? true; + } + } + return true; +} + +export function shouldInjectOllamaCompatNumCtx(params: { + model: { api?: string; provider?: string; baseUrl?: string }; + config?: OpenClawConfig; + providerId?: string; +}): boolean { + // Restrict to the OpenAI-compatible adapter path only. + if (params.model.api !== "openai-completions") { + return false; + } + if (!isOllamaCompatProvider(params.model)) { + return false; + } + return resolveOllamaCompatNumCtxEnabled({ + config: params.config, + providerId: params.providerId, + }); +} + +export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: number): StreamFn { + const streamFn = baseFn ?? streamSimple; + return (model, context, options) => + streamFn(model, context, { + ...options, + onPayload: (payload: unknown) => { + if (!payload || typeof payload !== "object") { + options?.onPayload?.(payload); + return; + } + const payloadRecord = payload as Record; + if (!payloadRecord.options || typeof payloadRecord.options !== "object") { + payloadRecord.options = {}; + } + (payloadRecord.options as Record).num_ctx = numCtx; + options?.onPayload?.(payload); + }, + }); +} + function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void { if (!message || typeof message !== "object") { return; @@ -773,6 +871,27 @@ export async function runEmbeddedAttempt( activeSession.agent.streamFn = streamSimple; } + // Ollama with OpenAI-compatible API needs num_ctx in payload.options. + // Otherwise Ollama defaults to a 4096 context window. + const providerIdForNumCtx = + typeof params.model.provider === "string" && params.model.provider.trim().length > 0 + ? params.model.provider + : params.provider; + const shouldInjectNumCtx = shouldInjectOllamaCompatNumCtx({ + model: params.model, + config: params.config, + providerId: providerIdForNumCtx, + }); + if (shouldInjectNumCtx) { + const numCtx = Math.max( + 1, + Math.floor( + params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS, + ), + ); + activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx); + } + applyExtraParamsToAgent( activeSession.agent, params.config, diff --git a/src/config/schema.help.ts b/src/config/schema.help.ts index e5ec1ad41..ef2e06cbe 100644 --- a/src/config/schema.help.ts +++ b/src/config/schema.help.ts @@ -630,7 +630,7 @@ export const FIELD_HELP: Record = { models: "Model catalog root for provider definitions, merge/replace behavior, and optional Bedrock discovery integration. Keep provider definitions explicit and validated before relying on production failover paths.", "models.mode": - 'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing.', + 'Controls provider catalog behavior: "merge" keeps built-ins and overlays your custom providers, while "replace" uses only your configured providers. In "merge", matching provider IDs preserve non-empty agent models.json apiKey/baseUrl values and fall back to config when agent values are empty or missing; matching model contextWindow/maxTokens use the higher value between explicit and implicit entries.', "models.providers": "Provider map keyed by provider ID containing connection/auth settings and concrete model definitions. Use stable provider keys so references from agents and tooling remain portable across environments.", "models.providers.*.baseUrl": @@ -641,6 +641,8 @@ export const FIELD_HELP: Record = { 'Selects provider auth style: "api-key" for API key auth, "token" for bearer token auth, "oauth" for OAuth credentials, and "aws-sdk" for AWS credential resolution. Match this to your provider requirements.', "models.providers.*.api": "Provider API adapter selection controlling request/response compatibility handling for model calls. Use the adapter that matches your upstream provider protocol to avoid feature mismatch.", + "models.providers.*.injectNumCtxForOpenAICompat": + "Controls whether OpenClaw injects `options.num_ctx` for Ollama providers configured with the OpenAI-compatible adapter (`openai-completions`). Default is true. Set false only if your proxy/upstream rejects unknown `options` payload fields.", "models.providers.*.headers": "Static HTTP headers merged into provider requests for tenant routing, proxy auth, or custom gateway requirements. Use this sparingly and keep sensitive header values in secrets.", "models.providers.*.authHeader": diff --git a/src/config/schema.labels.ts b/src/config/schema.labels.ts index 4ded77e83..7005613b6 100644 --- a/src/config/schema.labels.ts +++ b/src/config/schema.labels.ts @@ -378,6 +378,7 @@ export const FIELD_LABELS: Record = { "models.providers.*.apiKey": "Model Provider API Key", "models.providers.*.auth": "Model Provider Auth Mode", "models.providers.*.api": "Model Provider API Adapter", + "models.providers.*.injectNumCtxForOpenAICompat": "Model Provider Inject num_ctx (OpenAI Compat)", "models.providers.*.headers": "Model Provider Headers", "models.providers.*.authHeader": "Model Provider Authorization Header", "models.providers.*.models": "Model Provider Model List", diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 252e635e8..6e7e9efe5 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -52,6 +52,7 @@ export type ModelProviderConfig = { apiKey?: SecretInput; auth?: ModelProviderAuthMode; api?: ModelApi; + injectNumCtxForOpenAICompat?: boolean; headers?: Record; authHeader?: boolean; models: ModelDefinitionConfig[]; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 711faf5e9..eca825698 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -232,6 +232,7 @@ export const ModelProviderSchema = z .union([z.literal("api-key"), z.literal("aws-sdk"), z.literal("oauth"), z.literal("token")]) .optional(), api: ModelApiSchema.optional(), + injectNumCtxForOpenAICompat: z.boolean().optional(), headers: z.record(z.string(), z.string()).optional(), authHeader: z.boolean().optional(), models: z.array(ModelDefinitionSchema),