fix: clean up lint + guardCancel typing
This commit is contained in:
@@ -52,6 +52,8 @@
|
|||||||
|
|
||||||
### Fixes
|
### Fixes
|
||||||
- Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias).
|
- Models/Onboarding: configure MiniMax (minimax.io) via Anthropic-compatible `/anthropic` endpoint by default (keep `minimax-api` as a legacy alias).
|
||||||
|
- Models: normalize Gemini 3 Pro/Flash IDs to preview names for live model lookups. (#769) — thanks @steipete.
|
||||||
|
- CLI: fix guardCancel typing for configure prompts. (#769) — thanks @steipete.
|
||||||
- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes.
|
- Gateway/WebChat: include handshake validation details in the WebSocket close reason for easier debugging; preserve close codes.
|
||||||
- Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test.
|
- Gateway/Auth: send invalid connect responses before closing the handshake; stabilize invalid-connect auth test.
|
||||||
- Gateway: tighten gateway listener detection.
|
- Gateway: tighten gateway listener detection.
|
||||||
|
|||||||
@@ -166,10 +166,7 @@ describe("models config", () => {
|
|||||||
providers: Record<string, { models: Array<{ id: string }> }>;
|
providers: Record<string, { models: Array<{ id: string }> }>;
|
||||||
};
|
};
|
||||||
const ids = parsed.providers.google?.models?.map((model) => model.id);
|
const ids = parsed.providers.google?.models?.map((model) => model.id);
|
||||||
expect(ids).toEqual([
|
expect(ids).toEqual(["gemini-3-pro-preview", "gemini-3-flash-preview"]);
|
||||||
"gemini-3-pro-preview",
|
|
||||||
"gemini-3-flash-preview",
|
|
||||||
]);
|
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|||||||
@@ -151,9 +151,6 @@ describeLive("live models (profile keys)", () => {
|
|||||||
const authStorage = discoverAuthStorage(agentDir);
|
const authStorage = discoverAuthStorage(agentDir);
|
||||||
const modelRegistry = discoverModels(authStorage, agentDir);
|
const modelRegistry = discoverModels(authStorage, agentDir);
|
||||||
const models = modelRegistry.getAll() as Array<Model<Api>>;
|
const models = modelRegistry.getAll() as Array<Model<Api>>;
|
||||||
const modelByKey = new Map(
|
|
||||||
models.map((model) => [`${model.provider}/${model.id}`, model]),
|
|
||||||
);
|
|
||||||
|
|
||||||
const rawModels = process.env.CLAWDBOT_LIVE_MODELS?.trim();
|
const rawModels = process.env.CLAWDBOT_LIVE_MODELS?.trim();
|
||||||
const useModern = rawModels === "modern" || rawModels === "all";
|
const useModern = rawModels === "modern" || rawModels === "all";
|
||||||
@@ -348,10 +345,15 @@ describeLive("live models (profile keys)", () => {
|
|||||||
isAnthropicRateLimitError(message) &&
|
isAnthropicRateLimitError(message) &&
|
||||||
attempt + 1 < attemptMax
|
attempt + 1 < attemptMax
|
||||||
) {
|
) {
|
||||||
logProgress(`${progressLabel}: rate limit, retrying with next key`);
|
logProgress(
|
||||||
|
`${progressLabel}: rate limit, retrying with next key`,
|
||||||
|
);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (model.provider === "google" && isGoogleModelNotFoundError(err)) {
|
if (
|
||||||
|
model.provider === "google" &&
|
||||||
|
isGoogleModelNotFoundError(err)
|
||||||
|
) {
|
||||||
skipped.push({ model: id, reason: message });
|
skipped.push({ model: id, reason: message });
|
||||||
logProgress(`${progressLabel}: skip (google model not found)`);
|
logProgress(`${progressLabel}: skip (google model not found)`);
|
||||||
break;
|
break;
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ import {
|
|||||||
discoverAuthStorage,
|
discoverAuthStorage,
|
||||||
discoverModels,
|
discoverModels,
|
||||||
} from "@mariozechner/pi-coding-agent";
|
} from "@mariozechner/pi-coding-agent";
|
||||||
import { describe, expect, it } from "vitest";
|
import { describe, it } from "vitest";
|
||||||
import { resolveClawdbotAgentDir } from "../agents/agent-paths.js";
|
import { resolveClawdbotAgentDir } from "../agents/agent-paths.js";
|
||||||
import {
|
import {
|
||||||
collectAnthropicApiKeys,
|
collectAnthropicApiKeys,
|
||||||
@@ -34,8 +34,7 @@ const GATEWAY_LIVE = process.env.CLAWDBOT_LIVE_GATEWAY === "1";
|
|||||||
const ZAI_FALLBACK = process.env.CLAWDBOT_LIVE_GATEWAY_ZAI_FALLBACK === "1";
|
const ZAI_FALLBACK = process.env.CLAWDBOT_LIVE_GATEWAY_ZAI_FALLBACK === "1";
|
||||||
const PROVIDERS = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_PROVIDERS);
|
const PROVIDERS = parseFilter(process.env.CLAWDBOT_LIVE_GATEWAY_PROVIDERS);
|
||||||
const THINKING_LEVEL = "high";
|
const THINKING_LEVEL = "high";
|
||||||
const THINKING_TAG_RE =
|
const THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
||||||
/<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\s*>/i;
|
|
||||||
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/i;
|
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/i;
|
||||||
|
|
||||||
const describeLive = LIVE || GATEWAY_LIVE ? describe : describe.skip;
|
const describeLive = LIVE || GATEWAY_LIVE ? describe : describe.skip;
|
||||||
@@ -286,7 +285,11 @@ function buildMinimaxProviderOverride(params: {
|
|||||||
baseUrl: string;
|
baseUrl: string;
|
||||||
}): ModelProviderConfig | null {
|
}): ModelProviderConfig | null {
|
||||||
const existing = params.cfg.models?.providers?.minimax;
|
const existing = params.cfg.models?.providers?.minimax;
|
||||||
if (!existing || !Array.isArray(existing.models) || existing.models.length === 0)
|
if (
|
||||||
|
!existing ||
|
||||||
|
!Array.isArray(existing.models) ||
|
||||||
|
existing.models.length === 0
|
||||||
|
)
|
||||||
return null;
|
return null;
|
||||||
return {
|
return {
|
||||||
...existing,
|
...existing,
|
||||||
@@ -356,7 +359,9 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
|||||||
const anthropicKeys = collectAnthropicApiKeys();
|
const anthropicKeys = collectAnthropicApiKeys();
|
||||||
if (anthropicKeys.length > 0) {
|
if (anthropicKeys.length > 0) {
|
||||||
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
|
process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
|
||||||
logProgress(`[${params.label}] anthropic keys loaded: ${anthropicKeys.length}`);
|
logProgress(
|
||||||
|
`[${params.label}] anthropic keys loaded: ${anthropicKeys.length}`,
|
||||||
|
);
|
||||||
}
|
}
|
||||||
const sessionKey = `agent:dev:${params.label}`;
|
const sessionKey = `agent:dev:${params.label}`;
|
||||||
const failures: Array<{ model: string; error: string }> = [];
|
const failures: Array<{ model: string; error: string }> = [];
|
||||||
@@ -387,231 +392,235 @@ async function runGatewayModelSuite(params: GatewayModelSuiteParams) {
|
|||||||
key: sessionKey,
|
key: sessionKey,
|
||||||
});
|
});
|
||||||
|
|
||||||
logProgress(`${progressLabel}: prompt`);
|
logProgress(`${progressLabel}: prompt`);
|
||||||
const runId = randomUUID();
|
const runId = randomUUID();
|
||||||
const payload = await client.request<AgentFinalPayload>(
|
const payload = await client.request<AgentFinalPayload>(
|
||||||
"agent",
|
|
||||||
{
|
|
||||||
sessionKey,
|
|
||||||
idempotencyKey: `idem-${runId}`,
|
|
||||||
message:
|
|
||||||
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
|
||||||
thinking: params.thinkingLevel,
|
|
||||||
deliver: false,
|
|
||||||
},
|
|
||||||
{ expectFinal: true },
|
|
||||||
);
|
|
||||||
|
|
||||||
if (payload?.status !== "ok") {
|
|
||||||
throw new Error(`agent status=${String(payload?.status)}`);
|
|
||||||
}
|
|
||||||
const text = extractPayloadText(payload?.result);
|
|
||||||
if (model.provider === "google" && isGoogleModelNotFoundText(text)) {
|
|
||||||
// Catalog drift: model IDs can disappear or become unavailable on the API.
|
|
||||||
// Treat as skip when scanning "all models" for Google.
|
|
||||||
logProgress(`${progressLabel}: skip (google model not found)`);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
assertNoReasoningTags({
|
|
||||||
text,
|
|
||||||
model: modelKey,
|
|
||||||
phase: "prompt",
|
|
||||||
label: params.label,
|
|
||||||
});
|
|
||||||
if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`);
|
|
||||||
if (
|
|
||||||
!/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
|
|
||||||
!/\bmacro\s*-?\s*tasks?\b/i.test(text)
|
|
||||||
) {
|
|
||||||
throw new Error(`missing required keywords: ${text}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Real tool invocation: force the agent to Read a local file and echo a nonce.
|
|
||||||
logProgress(`${progressLabel}: tool-read`);
|
|
||||||
const runIdTool = randomUUID();
|
|
||||||
const toolProbe = await client.request<AgentFinalPayload>(
|
|
||||||
"agent",
|
|
||||||
{
|
|
||||||
sessionKey,
|
|
||||||
idempotencyKey: `idem-${runIdTool}-tool`,
|
|
||||||
message:
|
|
||||||
"Clawdbot live tool probe (local, safe): " +
|
|
||||||
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
|
||||||
"Then reply with the two nonce values you read (include both).",
|
|
||||||
thinking: params.thinkingLevel,
|
|
||||||
deliver: false,
|
|
||||||
},
|
|
||||||
{ expectFinal: true },
|
|
||||||
);
|
|
||||||
if (toolProbe?.status !== "ok") {
|
|
||||||
throw new Error(`tool probe failed: status=${String(toolProbe?.status)}`);
|
|
||||||
}
|
|
||||||
const toolText = extractPayloadText(toolProbe?.result);
|
|
||||||
assertNoReasoningTags({
|
|
||||||
text: toolText,
|
|
||||||
model: modelKey,
|
|
||||||
phase: "tool-read",
|
|
||||||
label: params.label,
|
|
||||||
});
|
|
||||||
if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) {
|
|
||||||
throw new Error(`tool probe missing nonce: ${toolText}`);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (params.extraToolProbes) {
|
|
||||||
logProgress(`${progressLabel}: tool-exec`);
|
|
||||||
const nonceC = randomUUID();
|
|
||||||
const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
|
|
||||||
|
|
||||||
const execReadProbe = await client.request<AgentFinalPayload>(
|
|
||||||
"agent",
|
"agent",
|
||||||
{
|
{
|
||||||
sessionKey,
|
sessionKey,
|
||||||
idempotencyKey: `idem-${runIdTool}-exec-read`,
|
idempotencyKey: `idem-${runId}`,
|
||||||
|
message:
|
||||||
|
"Explain in 2-3 sentences how the JavaScript event loop handles microtasks vs macrotasks. Must mention both words: microtask and macrotask.",
|
||||||
|
thinking: params.thinkingLevel,
|
||||||
|
deliver: false,
|
||||||
|
},
|
||||||
|
{ expectFinal: true },
|
||||||
|
);
|
||||||
|
|
||||||
|
if (payload?.status !== "ok") {
|
||||||
|
throw new Error(`agent status=${String(payload?.status)}`);
|
||||||
|
}
|
||||||
|
const text = extractPayloadText(payload?.result);
|
||||||
|
if (model.provider === "google" && isGoogleModelNotFoundText(text)) {
|
||||||
|
// Catalog drift: model IDs can disappear or become unavailable on the API.
|
||||||
|
// Treat as skip when scanning "all models" for Google.
|
||||||
|
logProgress(`${progressLabel}: skip (google model not found)`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
assertNoReasoningTags({
|
||||||
|
text,
|
||||||
|
model: modelKey,
|
||||||
|
phase: "prompt",
|
||||||
|
label: params.label,
|
||||||
|
});
|
||||||
|
if (!isMeaningful(text)) throw new Error(`not meaningful: ${text}`);
|
||||||
|
if (
|
||||||
|
!/\bmicro\s*-?\s*tasks?\b/i.test(text) ||
|
||||||
|
!/\bmacro\s*-?\s*tasks?\b/i.test(text)
|
||||||
|
) {
|
||||||
|
throw new Error(`missing required keywords: ${text}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Real tool invocation: force the agent to Read a local file and echo a nonce.
|
||||||
|
logProgress(`${progressLabel}: tool-read`);
|
||||||
|
const runIdTool = randomUUID();
|
||||||
|
const toolProbe = await client.request<AgentFinalPayload>(
|
||||||
|
"agent",
|
||||||
|
{
|
||||||
|
sessionKey,
|
||||||
|
idempotencyKey: `idem-${runIdTool}-tool`,
|
||||||
message:
|
message:
|
||||||
"Clawdbot live tool probe (local, safe): " +
|
"Clawdbot live tool probe (local, safe): " +
|
||||||
"use the tool named `exec` (or `Exec`) to run this command: " +
|
`use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolProbePath}"}. ` +
|
||||||
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
"Then reply with the two nonce values you read (include both).",
|
||||||
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
|
||||||
"Finally reply including the nonce text you read back.",
|
|
||||||
thinking: params.thinkingLevel,
|
thinking: params.thinkingLevel,
|
||||||
deliver: false,
|
deliver: false,
|
||||||
},
|
},
|
||||||
{ expectFinal: true },
|
{ expectFinal: true },
|
||||||
);
|
);
|
||||||
if (execReadProbe?.status !== "ok") {
|
if (toolProbe?.status !== "ok") {
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`exec+read probe failed: status=${String(execReadProbe?.status)}`,
|
`tool probe failed: status=${String(toolProbe?.status)}`,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
const execReadText = extractPayloadText(execReadProbe?.result);
|
const toolText = extractPayloadText(toolProbe?.result);
|
||||||
assertNoReasoningTags({
|
assertNoReasoningTags({
|
||||||
text: execReadText,
|
text: toolText,
|
||||||
model: modelKey,
|
model: modelKey,
|
||||||
phase: "tool-exec",
|
phase: "tool-read",
|
||||||
label: params.label,
|
label: params.label,
|
||||||
});
|
});
|
||||||
if (!execReadText.includes(nonceC)) {
|
if (!toolText.includes(nonceA) || !toolText.includes(nonceB)) {
|
||||||
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
throw new Error(`tool probe missing nonce: ${toolText}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
await fs.rm(toolWritePath, { force: true });
|
if (params.extraToolProbes) {
|
||||||
}
|
logProgress(`${progressLabel}: tool-exec`);
|
||||||
|
const nonceC = randomUUID();
|
||||||
|
const toolWritePath = path.join(tempDir, `write-${runIdTool}.txt`);
|
||||||
|
|
||||||
if (params.extraImageProbes && model.input?.includes("image")) {
|
const execReadProbe = await client.request<AgentFinalPayload>(
|
||||||
logProgress(`${progressLabel}: image`);
|
"agent",
|
||||||
const imageCode = randomImageProbeCode(10);
|
{
|
||||||
const imageBase64 = renderCatNoncePngBase64(imageCode);
|
sessionKey,
|
||||||
const runIdImage = randomUUID();
|
idempotencyKey: `idem-${runIdTool}-exec-read`,
|
||||||
|
message:
|
||||||
const imageProbe = await client.request<AgentFinalPayload>(
|
"Clawdbot live tool probe (local, safe): " +
|
||||||
"agent",
|
"use the tool named `exec` (or `Exec`) to run this command: " +
|
||||||
{
|
`mkdir -p "${tempDir}" && printf '%s' '${nonceC}' > "${toolWritePath}". ` +
|
||||||
sessionKey,
|
`Then use the tool named \`read\` (or \`Read\`) with JSON arguments {"path":"${toolWritePath}"}. ` +
|
||||||
idempotencyKey: `idem-${runIdImage}-image`,
|
"Finally reply including the nonce text you read back.",
|
||||||
message:
|
thinking: params.thinkingLevel,
|
||||||
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
|
deliver: false,
|
||||||
"(1) the animal shown or written in the image, lowercase; " +
|
},
|
||||||
"(2) the code printed in the image, uppercase. No extra text.",
|
{ expectFinal: true },
|
||||||
attachments: [
|
|
||||||
{
|
|
||||||
mimeType: "image/png",
|
|
||||||
fileName: `probe-${runIdImage}.png`,
|
|
||||||
content: imageBase64,
|
|
||||||
},
|
|
||||||
],
|
|
||||||
thinking: params.thinkingLevel,
|
|
||||||
deliver: false,
|
|
||||||
},
|
|
||||||
{ expectFinal: true },
|
|
||||||
);
|
|
||||||
if (imageProbe?.status !== "ok") {
|
|
||||||
throw new Error(
|
|
||||||
`image probe failed: status=${String(imageProbe?.status)}`,
|
|
||||||
);
|
);
|
||||||
|
if (execReadProbe?.status !== "ok") {
|
||||||
|
throw new Error(
|
||||||
|
`exec+read probe failed: status=${String(execReadProbe?.status)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const execReadText = extractPayloadText(execReadProbe?.result);
|
||||||
|
assertNoReasoningTags({
|
||||||
|
text: execReadText,
|
||||||
|
model: modelKey,
|
||||||
|
phase: "tool-exec",
|
||||||
|
label: params.label,
|
||||||
|
});
|
||||||
|
if (!execReadText.includes(nonceC)) {
|
||||||
|
throw new Error(`exec+read probe missing nonce: ${execReadText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
await fs.rm(toolWritePath, { force: true });
|
||||||
}
|
}
|
||||||
const imageText = extractPayloadText(imageProbe?.result);
|
|
||||||
assertNoReasoningTags({
|
if (params.extraImageProbes && model.input?.includes("image")) {
|
||||||
text: imageText,
|
logProgress(`${progressLabel}: image`);
|
||||||
model: modelKey,
|
const imageCode = randomImageProbeCode(10);
|
||||||
phase: "image",
|
const imageBase64 = renderCatNoncePngBase64(imageCode);
|
||||||
label: params.label,
|
const runIdImage = randomUUID();
|
||||||
});
|
|
||||||
if (!/\bcat\b/i.test(imageText)) {
|
const imageProbe = await client.request<AgentFinalPayload>(
|
||||||
throw new Error(`image probe missing 'cat': ${imageText}`);
|
"agent",
|
||||||
}
|
{
|
||||||
const candidates =
|
sessionKey,
|
||||||
imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
|
idempotencyKey: `idem-${runIdImage}-image`,
|
||||||
const bestDistance = candidates.reduce((best, cand) => {
|
message:
|
||||||
if (Math.abs(cand.length - imageCode.length) > 2) return best;
|
"Look at the attached image. Reply with exactly two tokens separated by a single space: " +
|
||||||
return Math.min(best, editDistance(cand, imageCode));
|
"(1) the animal shown or written in the image, lowercase; " +
|
||||||
}, Number.POSITIVE_INFINITY);
|
"(2) the code printed in the image, uppercase. No extra text.",
|
||||||
if (!(bestDistance <= 2)) {
|
attachments: [
|
||||||
throw new Error(
|
{
|
||||||
`image probe missing code (${imageCode}): ${imageText}`,
|
mimeType: "image/png",
|
||||||
|
fileName: `probe-${runIdImage}.png`,
|
||||||
|
content: imageBase64,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
thinking: params.thinkingLevel,
|
||||||
|
deliver: false,
|
||||||
|
},
|
||||||
|
{ expectFinal: true },
|
||||||
);
|
);
|
||||||
|
if (imageProbe?.status !== "ok") {
|
||||||
|
throw new Error(
|
||||||
|
`image probe failed: status=${String(imageProbe?.status)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const imageText = extractPayloadText(imageProbe?.result);
|
||||||
|
assertNoReasoningTags({
|
||||||
|
text: imageText,
|
||||||
|
model: modelKey,
|
||||||
|
phase: "image",
|
||||||
|
label: params.label,
|
||||||
|
});
|
||||||
|
if (!/\bcat\b/i.test(imageText)) {
|
||||||
|
throw new Error(`image probe missing 'cat': ${imageText}`);
|
||||||
|
}
|
||||||
|
const candidates =
|
||||||
|
imageText.toUpperCase().match(/[A-Z0-9]{6,20}/g) ?? [];
|
||||||
|
const bestDistance = candidates.reduce((best, cand) => {
|
||||||
|
if (Math.abs(cand.length - imageCode.length) > 2) return best;
|
||||||
|
return Math.min(best, editDistance(cand, imageCode));
|
||||||
|
}, Number.POSITIVE_INFINITY);
|
||||||
|
if (!(bestDistance <= 2)) {
|
||||||
|
throw new Error(
|
||||||
|
`image probe missing code (${imageCode}): ${imageText}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
|
// Regression: tool-call-only turn followed by a user message (OpenAI responses bug class).
|
||||||
if (
|
if (
|
||||||
(model.provider === "openai" && model.api === "openai-responses") ||
|
(model.provider === "openai" && model.api === "openai-responses") ||
|
||||||
(model.provider === "openai-codex" &&
|
(model.provider === "openai-codex" &&
|
||||||
model.api === "openai-codex-responses")
|
model.api === "openai-codex-responses")
|
||||||
) {
|
) {
|
||||||
logProgress(`${progressLabel}: tool-only regression`);
|
logProgress(`${progressLabel}: tool-only regression`);
|
||||||
const runId2 = randomUUID();
|
const runId2 = randomUUID();
|
||||||
const first = await client.request<AgentFinalPayload>(
|
const first = await client.request<AgentFinalPayload>(
|
||||||
"agent",
|
"agent",
|
||||||
{
|
{
|
||||||
sessionKey,
|
sessionKey,
|
||||||
idempotencyKey: `idem-${runId2}-1`,
|
idempotencyKey: `idem-${runId2}-1`,
|
||||||
message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
|
message: `Call the tool named \`read\` (or \`Read\`) on "${toolProbePath}". Do not write any other text.`,
|
||||||
thinking: params.thinkingLevel,
|
thinking: params.thinkingLevel,
|
||||||
deliver: false,
|
deliver: false,
|
||||||
},
|
},
|
||||||
{ expectFinal: true },
|
{ expectFinal: true },
|
||||||
);
|
|
||||||
if (first?.status !== "ok") {
|
|
||||||
throw new Error(`tool-only turn failed: status=${String(first?.status)}`);
|
|
||||||
}
|
|
||||||
const firstText = extractPayloadText(first?.result);
|
|
||||||
assertNoReasoningTags({
|
|
||||||
text: firstText,
|
|
||||||
model: modelKey,
|
|
||||||
phase: "tool-only",
|
|
||||||
label: params.label,
|
|
||||||
});
|
|
||||||
|
|
||||||
const second = await client.request<AgentFinalPayload>(
|
|
||||||
"agent",
|
|
||||||
{
|
|
||||||
sessionKey,
|
|
||||||
idempotencyKey: `idem-${runId2}-2`,
|
|
||||||
message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
|
|
||||||
thinking: params.thinkingLevel,
|
|
||||||
deliver: false,
|
|
||||||
},
|
|
||||||
{ expectFinal: true },
|
|
||||||
);
|
|
||||||
if (second?.status !== "ok") {
|
|
||||||
throw new Error(
|
|
||||||
`post-tool message failed: status=${String(second?.status)}`,
|
|
||||||
);
|
);
|
||||||
}
|
if (first?.status !== "ok") {
|
||||||
const reply = extractPayloadText(second?.result);
|
throw new Error(
|
||||||
assertNoReasoningTags({
|
`tool-only turn failed: status=${String(first?.status)}`,
|
||||||
text: reply,
|
);
|
||||||
model: modelKey,
|
}
|
||||||
phase: "tool-only-followup",
|
const firstText = extractPayloadText(first?.result);
|
||||||
label: params.label,
|
assertNoReasoningTags({
|
||||||
});
|
text: firstText,
|
||||||
if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
|
model: modelKey,
|
||||||
throw new Error(`unexpected reply: ${reply}`);
|
phase: "tool-only",
|
||||||
}
|
label: params.label,
|
||||||
}
|
});
|
||||||
|
|
||||||
logProgress(`${progressLabel}: done`);
|
const second = await client.request<AgentFinalPayload>(
|
||||||
break;
|
"agent",
|
||||||
|
{
|
||||||
|
sessionKey,
|
||||||
|
idempotencyKey: `idem-${runId2}-2`,
|
||||||
|
message: `Now answer: what are the values of nonceA and nonceB in "${toolProbePath}"? Reply with exactly: ${nonceA} ${nonceB}.`,
|
||||||
|
thinking: params.thinkingLevel,
|
||||||
|
deliver: false,
|
||||||
|
},
|
||||||
|
{ expectFinal: true },
|
||||||
|
);
|
||||||
|
if (second?.status !== "ok") {
|
||||||
|
throw new Error(
|
||||||
|
`post-tool message failed: status=${String(second?.status)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
const reply = extractPayloadText(second?.result);
|
||||||
|
assertNoReasoningTags({
|
||||||
|
text: reply,
|
||||||
|
model: modelKey,
|
||||||
|
phase: "tool-only-followup",
|
||||||
|
label: params.label,
|
||||||
|
});
|
||||||
|
if (!reply.includes(nonceA) || !reply.includes(nonceB)) {
|
||||||
|
throw new Error(`unexpected reply: ${reply}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logProgress(`${progressLabel}: done`);
|
||||||
|
break;
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
const message = String(err);
|
const message = String(err);
|
||||||
if (
|
if (
|
||||||
@@ -686,7 +695,6 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
|||||||
|
|
||||||
const candidates: Array<Model<Api>> = [];
|
const candidates: Array<Model<Api>> = [];
|
||||||
for (const model of wanted) {
|
for (const model of wanted) {
|
||||||
const id = `${model.provider}/${model.id}`;
|
|
||||||
if (PROVIDERS && !PROVIDERS.has(model.provider)) continue;
|
if (PROVIDERS && !PROVIDERS.has(model.provider)) continue;
|
||||||
try {
|
try {
|
||||||
// eslint-disable-next-line no-await-in-loop
|
// eslint-disable-next-line no-await-in-loop
|
||||||
@@ -721,9 +729,13 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
|||||||
thinkingLevel: THINKING_LEVEL,
|
thinkingLevel: THINKING_LEVEL,
|
||||||
});
|
});
|
||||||
|
|
||||||
const minimaxCandidates = candidates.filter((model) => model.provider === "minimax");
|
const minimaxCandidates = candidates.filter(
|
||||||
|
(model) => model.provider === "minimax",
|
||||||
|
);
|
||||||
if (minimaxCandidates.length === 0) {
|
if (minimaxCandidates.length === 0) {
|
||||||
logProgress("[minimax] no candidates with keys; skipping dual endpoint probes");
|
logProgress(
|
||||||
|
"[minimax] no candidates with keys; skipping dual endpoint probes",
|
||||||
|
);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -743,7 +755,9 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
|||||||
providerOverrides: { minimax: minimaxOpenAi },
|
providerOverrides: { minimax: minimaxOpenAi },
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
logProgress("[minimax-openai] missing minimax provider config; skipping");
|
logProgress(
|
||||||
|
"[minimax-openai] missing minimax provider config; skipping",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const minimaxAnthropic = buildMinimaxProviderOverride({
|
const minimaxAnthropic = buildMinimaxProviderOverride({
|
||||||
@@ -762,7 +776,9 @@ describeLive("gateway live (dev agent, profile keys)", () => {
|
|||||||
providerOverrides: { minimax: minimaxAnthropic },
|
providerOverrides: { minimax: minimaxAnthropic },
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
logProgress("[minimax-anthropic] missing minimax provider config; skipping");
|
logProgress(
|
||||||
|
"[minimax-anthropic] missing minimax provider config; skipping",
|
||||||
|
);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
20 * 60 * 1000,
|
20 * 60 * 1000,
|
||||||
|
|||||||
@@ -18,10 +18,7 @@ function loadProfileEnv(): void {
|
|||||||
try {
|
try {
|
||||||
const output = execFileSync(
|
const output = execFileSync(
|
||||||
"/bin/bash",
|
"/bin/bash",
|
||||||
[
|
["-lc", `set -a; source "${profilePath}" >/dev/null 2>&1; env -0`],
|
||||||
"-lc",
|
|
||||||
`set -a; source \"${profilePath}\" >/dev/null 2>&1; env -0`,
|
|
||||||
],
|
|
||||||
{ encoding: "utf8" },
|
{ encoding: "utf8" },
|
||||||
);
|
);
|
||||||
const entries = output.split("\0");
|
const entries = output.split("\0");
|
||||||
|
|||||||
Reference in New Issue
Block a user