From ea3b7dfde59607876e7ba41afdc8da44d0982b0b Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Mon, 2 Mar 2026 23:48:00 +0000 Subject: [PATCH] fix(channels): normalize MIME kind parsing and reaction fallbacks --- CHANGELOG.md | 2 + src/channels/plugins/actions/actions.test.ts | 37 +++++++++++++++++++ .../plugins/actions/discord/handle-action.ts | 9 ++++- src/imessage/monitor/monitor-provider.ts | 4 +- src/imessage/send.test.ts | 13 +++++++ src/imessage/send.ts | 4 +- .../event-handler.mention-gating.test.ts | 25 +++++++++++++ src/signal/monitor/event-handler.ts | 6 +-- src/signal/send.ts | 4 +- src/telegram/bot/delivery.replies.ts | 5 +-- src/telegram/send.test.ts | 10 +++++ src/telegram/send.ts | 8 ++-- src/web/media.ts | 8 ++-- 13 files changed, 114 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e431f59b4..c846d8f4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,7 @@ Docs: https://docs.openclaw.ai - Sandbox/workspace mount permissions: make primary `/workspace` bind mounts read-only whenever `workspaceAccess` is not `rw` (including `none`) across both core sandbox container and sandbox browser create flows. (#32227) Thanks @guanyu-zhang. - Security audit/skills workspace hardening: add `skills.workspace.symlink_escape` warning in `openclaw security audit` when workspace `skills/**/SKILL.md` resolves outside the workspace root (for example symlink-chain drift), plus docs coverage in the security glossary. - Signal/message actions: allow `react` to fall back to `toolContext.currentMessageId` when `messageId` is omitted, matching Telegram behavior and unblocking agent-initiated reactions on inbound turns. (#32217) Thanks @dunamismax. +- Discord/message actions: allow `react` to fall back to `toolContext.currentMessageId` when `messageId` is omitted, matching Telegram/Signal reaction ergonomics in inbound turns. - Gateway/OpenAI chat completions: honor `x-openclaw-message-channel` when building `agentCommand` input for `/v1/chat/completions`, preserving caller channel identity instead of forcing `webchat`. (#30462) Thanks @bmendonca3. - Secrets/exec resolver timeout defaults: use provider `timeoutMs` as the default inactivity (`noOutputTimeoutMs`) watchdog for exec secret providers, preventing premature no-output kills for resolvers that start producing output after 2s. (#32235) Thanks @bmendonca3. - Feishu/File upload filenames: percent-encode non-ASCII/special-character `file_name` values in Feishu multipart uploads so Chinese/symbol-heavy filenames are sent as proper attachments instead of plain text links. (#31179) Thanks @Kay-051. @@ -44,6 +45,7 @@ Docs: https://docs.openclaw.ai - Plugin SDK/runtime hardening: add package export verification in CI/release checks to catch missing runtime exports before publish-time regressions. (#28575) Thanks @Glucksberg. - Media understanding/provider HTTP proxy routing: pass a proxy-aware fetch function from `HTTPS_PROXY`/`HTTP_PROXY` env vars into audio/video provider calls (with graceful malformed-proxy fallback) so transcription/video requests honor configured outbound proxies. (#27093) Thanks @mcaxtr. - Media/MIME normalization: normalize parameterized/case-variant MIME strings in `kindFromMime` (for example `Audio/Ogg; codecs=opus`) so WhatsApp voice notes are classified as audio and routed through transcription correctly. (#32280) Thanks @Lucenx9. +- Media/MIME channel parity: route Telegram/Signal/iMessage media-kind checks through normalized `kindFromMime` so mixed-case/parameterized MIME values classify consistently across message channels. - Media understanding/malformed attachment guards: harden attachment selection and decision summary formatting against non-array or malformed attachment payloads to prevent runtime crashes on invalid inbound metadata shapes. (#28024) Thanks @claw9267. - Media understanding/parakeet CLI output parsing: read `parakeet-mlx` transcripts from `--output-dir/.txt` when txt output is requested (or default), with stdout fallback for non-txt formats. (#9177) Thanks @mac-110. - Media understanding/audio transcription guard: skip tiny/empty audio files (<1024 bytes) before provider/CLI transcription to avoid noisy invalid-audio failures and preserve clean fallback behavior. (#8388) Thanks @Glucksberg. diff --git a/src/channels/plugins/actions/actions.test.ts b/src/channels/plugins/actions/actions.test.ts index e82dce0a7..bd0454bf7 100644 --- a/src/channels/plugins/actions/actions.test.ts +++ b/src/channels/plugins/actions/actions.test.ts @@ -456,6 +456,43 @@ describe("handleDiscordMessageAction", () => { expect.objectContaining({ mediaLocalRoots: ["/tmp/agent-root"] }), ); }); + + it("falls back to toolContext.currentMessageId for reactions when messageId is omitted", async () => { + await handleDiscordMessageAction({ + action: "react", + params: { + channelId: "123", + emoji: "ok", + }, + cfg: {} as OpenClawConfig, + toolContext: { currentMessageId: "9001" }, + }); + + const call = handleDiscordAction.mock.calls.at(-1); + expect(call?.[0]).toEqual( + expect.objectContaining({ + action: "react", + channelId: "123", + messageId: "9001", + emoji: "ok", + }), + ); + }); + + it("rejects reactions when neither messageId nor toolContext.currentMessageId is provided", async () => { + await expect( + handleDiscordMessageAction({ + action: "react", + params: { + channelId: "123", + emoji: "ok", + }, + cfg: {} as OpenClawConfig, + }), + ).rejects.toThrow(/messageId required/i); + + expect(handleDiscordAction).not.toHaveBeenCalled(); + }); }); describe("telegramMessageActions", () => { diff --git a/src/channels/plugins/actions/discord/handle-action.ts b/src/channels/plugins/actions/discord/handle-action.ts index c0f3dc01a..6f0a701b6 100644 --- a/src/channels/plugins/actions/discord/handle-action.ts +++ b/src/channels/plugins/actions/discord/handle-action.ts @@ -8,6 +8,7 @@ import { readDiscordParentIdParam } from "../../../../agents/tools/discord-actio import { handleDiscordAction } from "../../../../agents/tools/discord-actions.js"; import { resolveDiscordChannelId } from "../../../../discord/targets.js"; import type { ChannelMessageActionContext } from "../../types.js"; +import { resolveReactionMessageId } from "../reaction-message-id.js"; import { tryHandleDiscordMessageActionGuildAdmin } from "./handle-action.guild-admin.js"; const providerId = "discord"; @@ -107,7 +108,13 @@ export async function handleDiscordMessageAction( } if (action === "react") { - const messageId = readStringParam(params, "messageId", { required: true }); + const messageIdRaw = resolveReactionMessageId({ args: params, toolContext: ctx.toolContext }); + const messageId = messageIdRaw != null ? String(messageIdRaw).trim() : ""; + if (!messageId) { + throw new Error( + "messageId required. Provide messageId explicitly or react to the current inbound message.", + ); + } const emoji = readStringParam(params, "emoji", { allowEmpty: true }); const remove = typeof params.remove === "boolean" ? params.remove : undefined; return await handleDiscordAction( diff --git a/src/imessage/monitor/monitor-provider.ts b/src/imessage/monitor/monitor-provider.ts index 13bb6bac8..8a7b62d5c 100644 --- a/src/imessage/monitor/monitor-provider.ts +++ b/src/imessage/monitor/monitor-provider.ts @@ -25,12 +25,12 @@ import { readSessionUpdatedAt, resolveStorePath } from "../../config/sessions.js import { danger, logVerbose, shouldLogVerbose, warn } from "../../globals.js"; import { normalizeScpRemoteHost } from "../../infra/scp-host.js"; import { waitForTransportReady } from "../../infra/transport-ready.js"; -import { mediaKindFromMime } from "../../media/constants.js"; import { isInboundPathAllowed, resolveIMessageAttachmentRoots, resolveIMessageRemoteAttachmentRoots, } from "../../media/inbound-path-policy.js"; +import { kindFromMime } from "../../media/mime.js"; import { buildPairingReply } from "../../pairing/pairing-messages.js"; import { readChannelAllowFromStore, @@ -224,7 +224,7 @@ export async function monitorIMessageProvider(opts: MonitorIMessageOpts = {}): P // Build arrays for all attachments (for multi-image support) const mediaPaths = validAttachments.map((a) => a.original_path).filter(Boolean) as string[]; const mediaTypes = validAttachments.map((a) => a.mime_type ?? undefined); - const kind = mediaKindFromMime(mediaType ?? undefined); + const kind = kindFromMime(mediaType ?? undefined); const placeholder = kind ? `` : validAttachments.length diff --git a/src/imessage/send.test.ts b/src/imessage/send.test.ts index 7552b4782..5d0987e60 100644 --- a/src/imessage/send.test.ts +++ b/src/imessage/send.test.ts @@ -71,6 +71,19 @@ describe("sendMessageIMessage", () => { expect(params.text).toBe(""); }); + it("normalizes mixed-case parameterized MIME for attachment placeholder text", async () => { + await sendWithDefaults("chat_id:7", "", { + mediaUrl: "http://x/voice", + resolveAttachmentImpl: async () => ({ + path: "/tmp/imessage-media.ogg", + contentType: " Audio/Ogg; codecs=opus ", + }), + }); + const params = getSentParams(); + expect(params.file).toBe("/tmp/imessage-media.ogg"); + expect(params.text).toBe(""); + }); + it("returns message id when rpc provides one", async () => { requestMock.mockResolvedValue({ ok: true, id: 123 }); const result = await sendWithDefaults("chat_id:7", "hello"); diff --git a/src/imessage/send.ts b/src/imessage/send.ts index 7c3345b75..efa3fca33 100644 --- a/src/imessage/send.ts +++ b/src/imessage/send.ts @@ -1,7 +1,7 @@ import { loadConfig } from "../config/config.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; import { convertMarkdownTables } from "../markdown/tables.js"; -import { mediaKindFromMime } from "../media/constants.js"; +import { kindFromMime } from "../media/mime.js"; import { resolveOutboundAttachmentFromUrl } from "../media/outbound-attachment.js"; import { resolveIMessageAccount, type ResolvedIMessageAccount } from "./accounts.js"; import { createIMessageRpcClient, type IMessageRpcClient } from "./client.js"; @@ -129,7 +129,7 @@ export async function sendMessageIMessage( }); filePath = resolved.path; if (!message.trim()) { - const kind = mediaKindFromMime(resolved.contentType ?? undefined); + const kind = kindFromMime(resolved.contentType ?? undefined); if (kind) { message = kind === "image" ? "" : ``; } diff --git a/src/signal/monitor/event-handler.mention-gating.test.ts b/src/signal/monitor/event-handler.mention-gating.test.ts index b57625a44..403f36c1a 100644 --- a/src/signal/monitor/event-handler.mention-gating.test.ts +++ b/src/signal/monitor/event-handler.mention-gating.test.ts @@ -146,6 +146,31 @@ describe("signal mention gating", () => { ); }); + it("normalizes mixed-case parameterized attachment MIME in skipped pending history", async () => { + capturedCtx = undefined; + const groupHistories = new Map(); + const handler = createSignalEventHandler( + createBaseSignalEventHandlerDeps({ + cfg: createSignalConfig({ requireMention: true }), + historyLimit: 5, + groupHistories, + ignoreAttachments: false, + }), + ); + + await handler( + makeGroupEvent({ + message: "", + attachments: [{ contentType: " Audio/Ogg; codecs=opus " }], + }), + ); + + expect(capturedCtx).toBeUndefined(); + const entries = groupHistories.get("g1"); + expect(entries).toHaveLength(1); + expect(entries[0].body).toBe(""); + }); + it("records quote text in pending history for skipped quote-only group messages", async () => { await expectSkippedGroupHistory({ message: "", quoteText: "quoted context" }, "quoted context"); }); diff --git a/src/signal/monitor/event-handler.ts b/src/signal/monitor/event-handler.ts index c94a7b77a..bb8bfce02 100644 --- a/src/signal/monitor/event-handler.ts +++ b/src/signal/monitor/event-handler.ts @@ -29,7 +29,7 @@ import { resolveChannelGroupRequireMention } from "../../config/group-policy.js" import { readSessionUpdatedAt, resolveStorePath } from "../../config/sessions.js"; import { danger, logVerbose, shouldLogVerbose } from "../../globals.js"; import { enqueueSystemEvent } from "../../infra/system-events.js"; -import { mediaKindFromMime } from "../../media/constants.js"; +import { kindFromMime } from "../../media/mime.js"; import { resolveAgentRoute } from "../../routing/resolve-route.js"; import { DM_GROUP_ACCESS_REASON, @@ -636,7 +636,7 @@ export function createSignalEventHandler(deps: SignalEventHandlerDeps) { return ""; } const firstContentType = dataMessage.attachments?.[0]?.contentType; - const pendingKind = mediaKindFromMime(firstContentType ?? undefined); + const pendingKind = kindFromMime(firstContentType ?? undefined); return pendingKind ? `` : ""; })(); const pendingBodyText = messageText || pendingPlaceholder || quoteText; @@ -679,7 +679,7 @@ export function createSignalEventHandler(deps: SignalEventHandlerDeps) { } } - const kind = mediaKindFromMime(mediaType ?? undefined); + const kind = kindFromMime(mediaType ?? undefined); if (kind) { placeholder = ``; } else if (dataMessage.attachments?.length) { diff --git a/src/signal/send.ts b/src/signal/send.ts index 9b73d7d86..8bcd385e2 100644 --- a/src/signal/send.ts +++ b/src/signal/send.ts @@ -1,6 +1,6 @@ import { loadConfig } from "../config/config.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; -import { mediaKindFromMime } from "../media/constants.js"; +import { kindFromMime } from "../media/mime.js"; import { resolveOutboundAttachmentFromUrl } from "../media/outbound-attachment.js"; import { resolveSignalAccount } from "./accounts.js"; import { signalRpcRequest } from "./client.js"; @@ -130,7 +130,7 @@ export async function sendMessageSignal( localRoots: opts.mediaLocalRoots, }); attachments = [resolved.path]; - const kind = mediaKindFromMime(resolved.contentType ?? undefined); + const kind = kindFromMime(resolved.contentType ?? undefined); if (!message && kind) { // Avoid sending an empty body when only attachments exist. message = kind === "image" ? "" : ``; diff --git a/src/telegram/bot/delivery.replies.ts b/src/telegram/bot/delivery.replies.ts index 209b9bfb6..71d0a82f6 100644 --- a/src/telegram/bot/delivery.replies.ts +++ b/src/telegram/bot/delivery.replies.ts @@ -5,9 +5,8 @@ import type { ReplyToMode } from "../../config/config.js"; import type { MarkdownTableMode } from "../../config/types.base.js"; import { danger, logVerbose } from "../../globals.js"; import { formatErrorMessage } from "../../infra/errors.js"; -import { mediaKindFromMime } from "../../media/constants.js"; import { buildOutboundMediaLoadOptions } from "../../media/load-options.js"; -import { isGifMedia } from "../../media/mime.js"; +import { isGifMedia, kindFromMime } from "../../media/mime.js"; import type { RuntimeEnv } from "../../runtime.js"; import { loadWebMedia } from "../../web/media.js"; import type { TelegramInlineButtons } from "../button-types.js"; @@ -234,7 +233,7 @@ async function deliverMediaReply(params: { mediaUrl, buildOutboundMediaLoadOptions({ mediaLocalRoots: params.mediaLocalRoots }), ); - const kind = mediaKindFromMime(media.contentType ?? undefined); + const kind = kindFromMime(media.contentType ?? undefined); const isGif = isGifMedia({ contentType: media.contentType, fileName: media.fileName, diff --git a/src/telegram/send.test.ts b/src/telegram/send.test.ts index b589fdcf5..78a28cd39 100644 --- a/src/telegram/send.test.ts +++ b/src/telegram/send.test.ts @@ -872,6 +872,16 @@ describe("sendMessageTelegram", () => { expectedMethod: "sendVoice" as const, expectedOptions: { caption: "caption", parse_mode: "HTML" }, }, + { + name: "normalizes parameterized audio MIME with mixed casing", + chatId: "123", + text: "caption", + mediaUrl: "https://example.com/note", + contentType: " Audio/Ogg; codecs=opus ", + fileName: "note.ogg", + expectedMethod: "sendAudio" as const, + expectedOptions: { caption: "caption", parse_mode: "HTML" }, + }, ]; for (const testCase of cases) { diff --git a/src/telegram/send.ts b/src/telegram/send.ts index ae0d5b525..6fa007405 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -15,9 +15,9 @@ import { createTelegramRetryRunner } from "../infra/retry-policy.js"; import type { RetryConfig } from "../infra/retry.js"; import { redactSensitiveText } from "../logging/redact.js"; import { createSubsystemLogger } from "../logging/subsystem.js"; -import { mediaKindFromMime } from "../media/constants.js"; +import type { MediaKind } from "../media/constants.js"; import { buildOutboundMediaLoadOptions } from "../media/load-options.js"; -import { isGifMedia } from "../media/mime.js"; +import { isGifMedia, kindFromMime } from "../media/mime.js"; import { normalizePollInput, type PollInput } from "../polls.js"; import { loadWebMedia } from "../web/media.js"; import { type ResolvedTelegramAccount, resolveTelegramAccount } from "./accounts.js"; @@ -566,7 +566,7 @@ export async function sendMessageTelegram( mediaLocalRoots: opts.mediaLocalRoots, }), ); - const kind = mediaKindFromMime(media.contentType ?? undefined); + const kind = kindFromMime(media.contentType ?? undefined); const isGif = isGifMedia({ contentType: media.contentType, fileName: media.fileName, @@ -944,7 +944,7 @@ export async function editMessageTelegram( return { ok: true, messageId: String(messageId), chatId }; } -function inferFilename(kind: ReturnType) { +function inferFilename(kind: MediaKind) { switch (kind) { case "image": return "image.jpg"; diff --git a/src/web/media.ts b/src/web/media.ts index cccd88e71..1e0842bb7 100644 --- a/src/web/media.ts +++ b/src/web/media.ts @@ -4,7 +4,7 @@ import { fileURLToPath } from "node:url"; import { logVerbose, shouldLogVerbose } from "../globals.js"; import { SafeOpenError, readLocalFileSafely } from "../infra/fs-safe.js"; import type { SsrFPolicy } from "../infra/net/ssrf.js"; -import { type MediaKind, maxBytesForKind, mediaKindFromMime } from "../media/constants.js"; +import { type MediaKind, maxBytesForKind } from "../media/constants.js"; import { fetchRemoteMedia } from "../media/fetch.js"; import { convertHeicToJpeg, @@ -13,7 +13,7 @@ import { resizeToJpeg, } from "../media/image-ops.js"; import { getDefaultMediaLocalRoots } from "../media/local-roots.js"; -import { detectMime, extensionForMime } from "../media/mime.js"; +import { detectMime, extensionForMime, kindFromMime } from "../media/mime.js"; import { resolveUserPath } from "../utils.js"; export type WebMediaResult = { @@ -333,7 +333,7 @@ async function loadWebMediaInternal( : maxBytes; const fetched = await fetchRemoteMedia({ url: mediaUrl, maxBytes: fetchCap, ssrfPolicy }); const { buffer, contentType, fileName } = fetched; - const kind = mediaKindFromMime(contentType); + const kind = kindFromMime(contentType); return await clampAndFinalize({ buffer, contentType, kind, fileName }); } @@ -385,7 +385,7 @@ async function loadWebMediaInternal( } } const mime = await detectMime({ buffer: data, filePath: mediaUrl }); - const kind = mediaKindFromMime(mime); + const kind = kindFromMime(mime); let fileName = path.basename(mediaUrl) || undefined; if (fileName && !path.extname(fileName) && mime) { const ext = extensionForMime(mime);