fix(providers): disable Bedrock prompt caching for non-Anthropic models (#20866) (thanks @pierreeurope)

This commit is contained in:
Peter Steinberger
2026-02-23 18:16:18 +00:00
parent e40ee3c2c7
commit ca5c0bc02b
3 changed files with 56 additions and 0 deletions

View File

@@ -151,6 +151,42 @@ describe("applyExtraParamsToAgent", () => {
});
});
it("disables prompt caching for non-Anthropic Bedrock models", () => {
const { calls, agent } = createOptionsCaptureAgent();
applyExtraParamsToAgent(agent, undefined, "amazon-bedrock", "amazon.nova-micro-v1");
const model = {
api: "openai-completions",
provider: "amazon-bedrock",
id: "amazon.nova-micro-v1",
} as Model<"openai-completions">;
const context: Context = { messages: [] };
void agent.streamFn?.(model, context, {});
expect(calls).toHaveLength(1);
expect(calls[0]?.cacheRetention).toBe("none");
});
it("keeps Anthropic Bedrock models eligible for provider-side caching", () => {
const { calls, agent } = createOptionsCaptureAgent();
applyExtraParamsToAgent(agent, undefined, "amazon-bedrock", "us.anthropic.claude-sonnet-4-5");
const model = {
api: "openai-completions",
provider: "amazon-bedrock",
id: "us.anthropic.claude-sonnet-4-5",
} as Model<"openai-completions">;
const context: Context = { messages: [] };
void agent.streamFn?.(model, context, {});
expect(calls).toHaveLength(1);
expect(calls[0]?.cacheRetention).toBeUndefined();
});
it("adds Anthropic 1M beta header when context1m is enabled for Opus/Sonnet", () => {
const { calls, agent } = createOptionsCaptureAgent();
const cfg = buildAnthropicModelConfig("anthropic/claude-opus-4-6", { context1m: true });

View File

@@ -137,6 +137,20 @@ function createStreamFnWithExtraParams(
return wrappedStreamFn;
}
function isAnthropicBedrockModel(modelId: string): boolean {
const normalized = modelId.toLowerCase();
return normalized.includes("anthropic.claude") || normalized.includes("anthropic/claude");
}
function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) =>
underlying(model, context, {
...options,
cacheRetention: "none",
});
}
function isDirectOpenAIBaseUrl(baseUrl: unknown): boolean {
if (typeof baseUrl !== "string" || !baseUrl.trim()) {
return true;
@@ -501,6 +515,11 @@ export function applyExtraParamsToAgent(
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
}
if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) {
log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`);
agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn);
}
// Enable Z.AI tool_stream for real-time tool call streaming.
// Enabled by default for Z.AI provider, can be disabled via params.tool_stream: false
if (provider === "zai" || provider === "z-ai") {