fix: sanitize Gemini 3.1 Google reasoning payloads

2026-02-25 01:36:36 +00:00
parent 039713c3e7
commit b35d00aaf8
4 changed files with 360 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -28,6 +28,7 @@ Docs: https://docs.openclaw.ai
 - Security/Hooks: normalize hook session-key classification with trim/lowercase plus Unicode NFKC folding (for example full-width `ＨＯＯＫ：...`) so external-content wrapping cannot be bypassed by mixed-case or lookalike prefixes. (#25750) Thanks @bmendonca3.
 - Security/Voice Call: add Telnyx webhook replay detection and canonicalize replay-key signature encoding (Base64/Base64URL equivalent forms dedupe together), so duplicate signed webhook deliveries no longer re-trigger side effects. (#25832) Thanks @bmendonca3.
 - Providers/OpenRouter/Auth profiles: bypass auth-profile cooldown/disable windows for OpenRouter, so provider failures no longer put OpenRouter profiles into local cooldown and stale legacy cooldown markers are ignored in fallback and status selection paths. (#25892) Thanks @alexanderatallah for raising this and @vincentkoc for the fix.
 - Providers/Google reasoning: sanitize invalid negative `thinkingBudget` payloads for Gemini 3.1 requests by dropping `-1` budgets and mapping configured reasoning effort to `thinkingLevel`, preventing malformed reasoning payloads on `google-generative-ai`. (#25900)
 - WhatsApp/Web reconnect: treat close status `440` as non-retryable (including string-form status values), stop reconnect loops immediately, and emit operator guidance to relink after resolving session conflicts. (#25858) Thanks @markmusson.
 - WhatsApp/Reasoning safety: suppress outbound payloads marked as reasoning and hard-drop text payloads that begin with `Reasoning:` before WhatsApp delivery, preventing hidden thinking blocks from leaking to end users through final-message paths. (#25804, #25214, #24328)
 - Onboarding/Telegram: keep core-channel onboarding available when plugin registry population is missing by falling back to built-in adapters and continuing wizard setup with actionable recovery guidance. (#25803) Thanks @Suko.
--- a/src/agents/pi-embedded-runner-extraparams.live.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.live.test.ts
@@ -6,9 +6,13 @@ import { isTruthyEnvValue } from "../infra/env.js";
 import { applyExtraParamsToAgent } from "./pi-embedded-runner.js";
 const OPENAI_KEY = process.env.OPENAI_API_KEY ?? "";
 const GEMINI_KEY = process.env.GEMINI_API_KEY ?? "";
 const LIVE = isTruthyEnvValue(process.env.OPENAI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE);
 const GEMINI_LIVE =
  isTruthyEnvValue(process.env.GEMINI_LIVE_TEST) || isTruthyEnvValue(process.env.LIVE);
 const describeLive = LIVE && OPENAI_KEY ? describe : describe.skip;
 const describeGeminiLive = GEMINI_LIVE && GEMINI_KEY ? describe : describe.skip;
 describeLive("pi embedded extra params (live)", () => {
  it("applies config maxTokens to openai streamFn", async () => {
@@ -62,3 +66,170 @@ describeLive("pi embedded extra params (live)", () => {
    expect(outputTokens ?? 0).toBeLessThanOrEqual(20);
  }, 30_000);
 });
 describeGeminiLive("pi embedded extra params (gemini live)", () => {
  function isGoogleModelUnavailableError(raw: string | undefined): boolean {
    const msg = (raw ?? "").toLowerCase();
    if (!msg) {
      return false;
    }
    return (
      msg.includes("not found") ||
      msg.includes("404") ||
      msg.includes("not_available") ||
      msg.includes("permission denied") ||
      msg.includes("unsupported model")
    );
  }
  function isGoogleImageProcessingError(raw: string | undefined): boolean {
    const msg = (raw ?? "").toLowerCase();
    if (!msg) {
      return false;
    }
    return (
      msg.includes("unable to process input image") ||
      msg.includes("invalid_argument") ||
      msg.includes("bad request")
    );
  }
  async function runGeminiProbe(params: {
    agentStreamFn: typeof streamSimple;
    model: Model<"google-generative-ai">;
    apiKey: string;
    oneByOneRedPngBase64: string;
    includeImage?: boolean;
    prompt: string;
    onPayload?: (payload: Record<string, unknown>) => void;
  }): Promise<{ sawDone: boolean; stopReason?: string; errorMessage?: string }> {
    const userContent: Array<
      { type: "text"; text: string } | { type: "image"; mimeType: string; data: string }
    > = [{ type: "text", text: params.prompt }];
    if (params.includeImage ?? true) {
      userContent.push({
        type: "image",
        mimeType: "image/png",
        data: params.oneByOneRedPngBase64,
      });
    }
    const stream = params.agentStreamFn(
      params.model,
      {
        messages: [
          {
            role: "user",
            content: userContent,
            timestamp: Date.now(),
          },
        ],
      },
      {
        apiKey: params.apiKey,
        reasoning: "high",
        maxTokens: 64,
        onPayload: (payload) => {
          params.onPayload?.(payload as Record<string, unknown>);
        },
      },
    );
    let sawDone = false;
    let stopReason: string | undefined;
    let errorMessage: string | undefined;
    for await (const event of stream) {
      if (event.type === "done") {
        sawDone = true;
        stopReason = event.reason;
      } else if (event.type === "error") {
        stopReason = event.reason;
        errorMessage = event.error?.errorMessage;
      }
    }
    return { sawDone, stopReason, errorMessage };
  }
  it("sanitizes Gemini 3.1 thinking payload and keeps image parts with reasoning enabled", async () => {
    const model = getModel(
      "google",
      "gemini-3.1-pro-preview",
    ) as unknown as Model<"google-generative-ai">;
    const agent = { streamFn: streamSimple };
    applyExtraParamsToAgent(agent, undefined, "google", model.id, undefined, "high");
    const oneByOneRedPngBase64 =
      "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR4nGP4zwAAAgIBAJBzWgkAAAAASUVORK5CYII=";
    let capturedPayload: Record<string, unknown> | undefined;
    const imageResult = await runGeminiProbe({
      agentStreamFn: agent.streamFn,
      model,
      apiKey: GEMINI_KEY,
      oneByOneRedPngBase64,
      includeImage: true,
      prompt: "What color is this image? Reply with one word.",
      onPayload: (payload) => {
        capturedPayload = payload;
      },
    });
    expect(capturedPayload).toBeDefined();
    const thinkingConfig = (
      capturedPayload?.config as { thinkingConfig?: Record<string, unknown> } | undefined
    )?.thinkingConfig;
    expect(thinkingConfig?.thinkingBudget).toBeUndefined();
    expect(thinkingConfig?.thinkingLevel).toBe("HIGH");
    const imagePart = (
      capturedPayload?.contents as
        | Array<{ parts?: Array<{ inlineData?: { mimeType?: string; data?: string } }> }>
        | undefined
    )?.[0]?.parts?.find((part) => part.inlineData !== undefined)?.inlineData;
    expect(imagePart).toEqual({
      mimeType: "image/png",
      data: oneByOneRedPngBase64,
    });
    if (!imageResult.sawDone && !isGoogleModelUnavailableError(imageResult.errorMessage)) {
      expect(isGoogleImageProcessingError(imageResult.errorMessage)).toBe(true);
    }
    const textResult = await runGeminiProbe({
      agentStreamFn: agent.streamFn,
      model,
      apiKey: GEMINI_KEY,
      oneByOneRedPngBase64,
      includeImage: false,
      prompt: "Reply with exactly OK.",
    });
    if (!textResult.sawDone && isGoogleModelUnavailableError(textResult.errorMessage)) {
      // Some keys/regions do not expose Gemini 3.1 preview. Fall back to a
      // stable model to keep live reasoning verification active.
      const fallbackModel = getModel(
        "google",
        "gemini-2.5-pro",
      ) as unknown as Model<"google-generative-ai">;
      const fallback = await runGeminiProbe({
        agentStreamFn: agent.streamFn,
        model: fallbackModel,
        apiKey: GEMINI_KEY,
        oneByOneRedPngBase64,
        includeImage: false,
        prompt: "Reply with exactly OK.",
      });
      expect(fallback.sawDone).toBe(true);
      expect(fallback.stopReason).toBeDefined();
      expect(fallback.stopReason).not.toBe("error");
      return;
    }
    expect(textResult.sawDone).toBe(true);
    expect(textResult.stopReason).toBeDefined();
    expect(textResult.stopReason).not.toBe("error");
  }, 45_000);
 });
--- a/src/agents/pi-embedded-runner-extraparams.test.ts
+++ b/src/agents/pi-embedded-runner-extraparams.test.ts
@@ -372,6 +372,102 @@ describe("applyExtraParamsToAgent", () => {
    expect(payloads[0]?.thinking).toBe("off");
  });
  it("removes invalid negative Google thinkingBudget and maps Gemini 3.1 to thinkingLevel", () => {
    const payloads: Record<string, unknown>[] = [];
    const baseStreamFn: StreamFn = (_model, _context, options) => {
      const payload: Record<string, unknown> = {
        contents: [
          {
            role: "user",
            parts: [
              { text: "describe image" },
              {
                inlineData: {
                  mimeType: "image/png",
                  data: "ZmFrZQ==",
                },
              },
            ],
          },
        ],
        config: {
          thinkingConfig: {
            includeThoughts: true,
            thinkingBudget: -1,
          },
        },
      };
      options?.onPayload?.(payload);
      payloads.push(payload);
      return {} as ReturnType<StreamFn>;
    };
    const agent = { streamFn: baseStreamFn };
    applyExtraParamsToAgent(agent, undefined, "atproxy", "gemini-3.1-pro-high", undefined, "high");
    const model = {
      api: "google-generative-ai",
      provider: "atproxy",
      id: "gemini-3.1-pro-high",
    } as Model<"google-generative-ai">;
    const context: Context = { messages: [] };
    void agent.streamFn?.(model, context, {});
    expect(payloads).toHaveLength(1);
    const thinkingConfig = (
      payloads[0]?.config as { thinkingConfig?: Record<string, unknown> } | undefined
    )?.thinkingConfig;
    expect(thinkingConfig).toEqual({
      includeThoughts: true,
      thinkingLevel: "HIGH",
    });
    expect(
      (
        payloads[0]?.contents as
          | Array<{ parts?: Array<{ inlineData?: { mimeType?: string; data?: string } }> }>
          | undefined
      )?.[0]?.parts?.[1]?.inlineData,
    ).toEqual({
      mimeType: "image/png",
      data: "ZmFrZQ==",
    });
  });
  it("keeps valid Google thinkingBudget unchanged", () => {
    const payloads: Record<string, unknown>[] = [];
    const baseStreamFn: StreamFn = (_model, _context, options) => {
      const payload: Record<string, unknown> = {
        config: {
          thinkingConfig: {
            includeThoughts: true,
            thinkingBudget: 2048,
          },
        },
      };
      options?.onPayload?.(payload);
      payloads.push(payload);
      return {} as ReturnType<StreamFn>;
    };
    const agent = { streamFn: baseStreamFn };
    applyExtraParamsToAgent(agent, undefined, "atproxy", "gemini-3.1-pro-high", undefined, "high");
    const model = {
      api: "google-generative-ai",
      provider: "atproxy",
      id: "gemini-3.1-pro-high",
    } as Model<"google-generative-ai">;
    const context: Context = { messages: [] };
    void agent.streamFn?.(model, context, {});
    expect(payloads).toHaveLength(1);
    expect(payloads[0]?.config).toEqual({
      thinkingConfig: {
        includeThoughts: true,
        thinkingBudget: 2048,
      },
    });
  });
  it("adds OpenRouter attribution headers to stream options", () => {
    const { calls, agent } = createOptionsCaptureAgent();
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -504,6 +504,94 @@ function createOpenRouterWrapper(
  };
 }
 function isGemini31Model(modelId: string): boolean {
  const normalized = modelId.toLowerCase();
  return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash");
 }
 function mapThinkLevelToGoogleThinkingLevel(
  thinkingLevel: ThinkLevel,
 ): "MINIMAL" | "LOW" | "MEDIUM" | "HIGH" | undefined {
  switch (thinkingLevel) {
    case "minimal":
      return "MINIMAL";
    case "low":
      return "LOW";
    case "medium":
      return "MEDIUM";
    case "high":
    case "xhigh":
      return "HIGH";
    default:
      return undefined;
  }
 }
 function sanitizeGoogleThinkingPayload(params: {
  payload: unknown;
  modelId?: string;
  thinkingLevel?: ThinkLevel;
 }): void {
  if (!params.payload || typeof params.payload !== "object") {
    return;
  }
  const payloadObj = params.payload as Record<string, unknown>;
  const config = payloadObj.config;
  if (!config || typeof config !== "object") {
    return;
  }
  const configObj = config as Record<string, unknown>;
  const thinkingConfig = configObj.thinkingConfig;
  if (!thinkingConfig || typeof thinkingConfig !== "object") {
    return;
  }
  const thinkingConfigObj = thinkingConfig as Record<string, unknown>;
  const thinkingBudget = thinkingConfigObj.thinkingBudget;
  if (typeof thinkingBudget !== "number" || thinkingBudget >= 0) {
    return;
  }
  // pi-ai can emit thinkingBudget=-1 for some Gemini 3.1 IDs; a negative budget
  // is invalid for Google-compatible backends and can lead to malformed handling.
  delete thinkingConfigObj.thinkingBudget;
  if (
    typeof params.modelId === "string" &&
    isGemini31Model(params.modelId) &&
    params.thinkingLevel &&
    params.thinkingLevel !== "off" &&
    thinkingConfigObj.thinkingLevel === undefined
  ) {
    const mappedLevel = mapThinkLevelToGoogleThinkingLevel(params.thinkingLevel);
    if (mappedLevel) {
      thinkingConfigObj.thinkingLevel = mappedLevel;
    }
  }
 }
 function createGoogleThinkingPayloadWrapper(
  baseStreamFn: StreamFn | undefined,
  thinkingLevel?: ThinkLevel,
 ): StreamFn {
  const underlying = baseStreamFn ?? streamSimple;
  return (model, context, options) => {
    const onPayload = options?.onPayload;
    return underlying(model, context, {
      ...options,
      onPayload: (payload) => {
        if (model.api === "google-generative-ai") {
          sanitizeGoogleThinkingPayload({
            payload,
            modelId: model.id,
            thinkingLevel,
          });
        }
        onPayload?.(payload);
      },
    });
  };
 }
 /**
 * Create a streamFn wrapper that injects tool_stream=true for Z.AI providers.
 *
@@ -615,6 +703,10 @@ export function applyExtraParamsToAgent(
    }
  }
  // Guard Google payloads against invalid negative thinking budgets emitted by
  // upstream model-ID heuristics for Gemini 3.1 variants.
  agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel);
  // Work around upstream pi-ai hardcoding `store: false` for Responses API.
  // Force `store=true` for direct OpenAI/OpenAI Codex providers so multi-turn
  // server-side conversation state is preserved.