fix(onboard): increase verification timeout and reduce max_tokens for custom provider probes (#27380)
* fix(onboard): increase verification timeout and reduce max_tokens for custom provider probes The onboard wizard sends a chat-completion request to verify custom providers. With max_tokens: 1024 and a 10 s timeout, large local models (e.g. Qwen3.5-27B on llama.cpp) routinely time out because the server needs to load the model and generate up to 1024 tokens before responding. Changes: - Raise VERIFY_TIMEOUT_MS from 10 s to 30 s - Lower max_tokens from 1024 to 1 (verification only needs a single token to confirm the API is reachable and the model ID is valid) - Add explicit stream: false to both OpenAI and Anthropic probes Closes #27346 Made-with: Cursor * Changelog: note custom-provider onboarding verification fix --------- Co-authored-by: Philipp Spiess <hello@philippspiess.com>
This commit is contained in:
@@ -128,7 +128,7 @@ describe("promptCustomApiConfig", () => {
|
||||
|
||||
const firstCall = fetchMock.mock.calls[0]?.[1] as { body?: string } | undefined;
|
||||
expect(firstCall?.body).toBeDefined();
|
||||
expect(JSON.parse(firstCall?.body ?? "{}")).toMatchObject({ max_tokens: 1024 });
|
||||
expect(JSON.parse(firstCall?.body ?? "{}")).toMatchObject({ max_tokens: 1 });
|
||||
});
|
||||
|
||||
it("uses expanded max_tokens for anthropic verification probes", async () => {
|
||||
@@ -143,7 +143,7 @@ describe("promptCustomApiConfig", () => {
|
||||
expect(fetchMock).toHaveBeenCalledTimes(2);
|
||||
const secondCall = fetchMock.mock.calls[1]?.[1] as { body?: string } | undefined;
|
||||
expect(secondCall?.body).toBeDefined();
|
||||
expect(JSON.parse(secondCall?.body ?? "{}")).toMatchObject({ max_tokens: 1024 });
|
||||
expect(JSON.parse(secondCall?.body ?? "{}")).toMatchObject({ max_tokens: 1 });
|
||||
});
|
||||
|
||||
it("re-prompts base url when unknown detection fails", async () => {
|
||||
@@ -220,7 +220,7 @@ describe("promptCustomApiConfig", () => {
|
||||
|
||||
const promise = runPromptCustomApi(prompter);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(10000);
|
||||
await vi.advanceTimersByTimeAsync(30_000);
|
||||
await promise;
|
||||
|
||||
expect(prompter.text).toHaveBeenCalledTimes(6);
|
||||
|
||||
@@ -18,7 +18,7 @@ import type { SecretInputMode } from "./onboard-types.js";
|
||||
const DEFAULT_OLLAMA_BASE_URL = "http://127.0.0.1:11434/v1";
|
||||
const DEFAULT_CONTEXT_WINDOW = 4096;
|
||||
const DEFAULT_MAX_TOKENS = 4096;
|
||||
const VERIFY_TIMEOUT_MS = 10000;
|
||||
const VERIFY_TIMEOUT_MS = 30_000;
|
||||
|
||||
/**
|
||||
* Detects if a URL is from Azure AI Foundry or Azure OpenAI.
|
||||
@@ -317,7 +317,8 @@ async function requestOpenAiVerification(params: {
|
||||
body: {
|
||||
model: params.modelId,
|
||||
messages: [{ role: "user", content: "Hi" }],
|
||||
max_tokens: 1024,
|
||||
max_tokens: 1,
|
||||
stream: false,
|
||||
},
|
||||
});
|
||||
}
|
||||
@@ -343,8 +344,9 @@ async function requestAnthropicVerification(params: {
|
||||
headers: buildAnthropicHeaders(params.apiKey),
|
||||
body: {
|
||||
model: params.modelId,
|
||||
max_tokens: 1024,
|
||||
max_tokens: 1,
|
||||
messages: [{ role: "user", content: "Hi" }],
|
||||
stream: false,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user