From 1c9c01ff492cef882dba12a7a5e74dfa8a491f35 Mon Sep 17 00:00:00 2001 From: Shadow Date: Fri, 13 Feb 2026 12:33:45 -0600 Subject: [PATCH] Discord: refine voice message handling --- docs/channels/discord.md | 16 ++++++ src/agents/tools/discord-actions-messaging.ts | 21 +++++--- src/agents/tools/discord-actions.e2e.test.ts | 39 +++++++++++++++ src/cli/program/message/register.send.ts | 6 ++- src/discord/voice-message.ts | 50 ++++++++----------- 5 files changed, 93 insertions(+), 39 deletions(-) diff --git a/docs/channels/discord.md b/docs/channels/discord.md index c232a042f..358deeac2 100644 --- a/docs/channels/discord.md +++ b/docs/channels/discord.md @@ -393,6 +393,22 @@ Default gate behavior: | moderation | disabled | | presence | disabled | +## Voice messages + +Discord voice messages show a waveform preview and require OGG/Opus audio plus metadata. OpenClaw generates the waveform automatically, but it needs `ffmpeg` and `ffprobe` available on the gateway host to inspect and convert audio files. + +Requirements and constraints: + +- Provide a **local file path** (URLs are rejected). +- Omit text content (Discord does not allow text + voice message in the same payload). +- Any audio format is accepted; OpenClaw converts to OGG/Opus when needed. + +Example: + +```bash +message(action="send", channel="discord", target="channel:123", path="/path/to/audio.mp3", asVoice=true) +``` + ## Troubleshooting diff --git a/src/agents/tools/discord-actions-messaging.ts b/src/agents/tools/discord-actions-messaging.ts index 94b40731c..c650c27fa 100644 --- a/src/agents/tools/discord-actions-messaging.ts +++ b/src/agents/tools/discord-actions-messaging.ts @@ -229,21 +229,26 @@ export async function handleDiscordMessagingAction( throw new Error("Discord message sends are disabled."); } const to = readStringParam(params, "to", { required: true }); - const content = readStringParam(params, "content", { - required: true, - allowEmpty: true, - }); - const mediaUrl = readStringParam(params, "mediaUrl"); - const replyTo = readStringParam(params, "replyTo"); const asVoice = params.asVoice === true; const silent = params.silent === true; + const content = readStringParam(params, "content", { + required: !asVoice, + allowEmpty: true, + }); + const mediaUrl = + readStringParam(params, "mediaUrl", { trim: false }) ?? + readStringParam(params, "path", { trim: false }) ?? + readStringParam(params, "filePath", { trim: false }); + const replyTo = readStringParam(params, "replyTo"); const embeds = Array.isArray(params.embeds) && params.embeds.length > 0 ? params.embeds : undefined; // Handle voice message sending if (asVoice) { if (!mediaUrl) { - throw new Error("Voice messages require a media file path (mediaUrl)."); + throw new Error( + "Voice messages require a local media file path (mediaUrl, path, or filePath).", + ); } if (content && content.trim()) { throw new Error( @@ -263,7 +268,7 @@ export async function handleDiscordMessagingAction( return jsonResult({ ok: true, result, voiceMessage: true }); } - const result = await sendMessageDiscord(to, content, { + const result = await sendMessageDiscord(to, content ?? "", { ...(accountId ? { accountId } : {}), mediaUrl, replyTo, diff --git a/src/agents/tools/discord-actions.e2e.test.ts b/src/agents/tools/discord-actions.e2e.test.ts index 815e9a6c3..1452c0626 100644 --- a/src/agents/tools/discord-actions.e2e.test.ts +++ b/src/agents/tools/discord-actions.e2e.test.ts @@ -32,6 +32,7 @@ const removeOwnReactionsDiscord = vi.fn(async () => ({ removed: ["👍"] })); const removeReactionDiscord = vi.fn(async () => ({})); const searchMessagesDiscord = vi.fn(async () => ({})); const sendMessageDiscord = vi.fn(async () => ({})); +const sendVoiceMessageDiscord = vi.fn(async () => ({})); const sendPollDiscord = vi.fn(async () => ({})); const sendStickerDiscord = vi.fn(async () => ({})); const setChannelPermissionDiscord = vi.fn(async () => ({ ok: true })); @@ -64,6 +65,7 @@ vi.mock("../../discord/send.js", () => ({ removeReactionDiscord: (...args: unknown[]) => removeReactionDiscord(...args), searchMessagesDiscord: (...args: unknown[]) => searchMessagesDiscord(...args), sendMessageDiscord: (...args: unknown[]) => sendMessageDiscord(...args), + sendVoiceMessageDiscord: (...args: unknown[]) => sendVoiceMessageDiscord(...args), sendPollDiscord: (...args: unknown[]) => sendPollDiscord(...args), sendStickerDiscord: (...args: unknown[]) => sendStickerDiscord(...args), setChannelPermissionDiscord: (...args: unknown[]) => setChannelPermissionDiscord(...args), @@ -235,6 +237,43 @@ describe("handleDiscordMessagingAction", () => { ); }); + it("sends voice messages from a local file path", async () => { + sendVoiceMessageDiscord.mockClear(); + sendMessageDiscord.mockClear(); + + await handleDiscordMessagingAction( + "sendMessage", + { + to: "channel:123", + path: "/tmp/voice.mp3", + asVoice: true, + silent: true, + }, + enableAllActions, + ); + + expect(sendVoiceMessageDiscord).toHaveBeenCalledWith("channel:123", "/tmp/voice.mp3", { + replyTo: undefined, + silent: true, + }); + expect(sendMessageDiscord).not.toHaveBeenCalled(); + }); + + it("rejects voice messages that include content", async () => { + await expect( + handleDiscordMessagingAction( + "sendMessage", + { + to: "channel:123", + mediaUrl: "/tmp/voice.mp3", + asVoice: true, + content: "hello", + }, + enableAllActions, + ), + ).rejects.toThrow(/Voice messages cannot include text content/); + }); + it("forwards optional thread content", async () => { createThreadDiscord.mockClear(); await handleDiscordMessagingAction( diff --git a/src/cli/program/message/register.send.ts b/src/cli/program/message/register.send.ts index 4ab3a852f..360e5bcc0 100644 --- a/src/cli/program/message/register.send.ts +++ b/src/cli/program/message/register.send.ts @@ -23,7 +23,11 @@ export function registerMessageSendCommand(message: Command, helpers: MessageCli .option("--reply-to ", "Reply-to message id") .option("--thread-id ", "Thread id (Telegram forum thread)") .option("--gif-playback", "Treat video media as GIF playback (WhatsApp only).", false) - .option("--silent", "Send message silently without notification (Telegram only)", false), + .option( + "--silent", + "Send message silently without notification (Telegram + Discord)", + false, + ), ) .action(async (opts) => { await helpers.runMessageAction("send", opts); diff --git a/src/discord/voice-message.ts b/src/discord/voice-message.ts index 98d1d8dd0..d03aa98ac 100644 --- a/src/discord/voice-message.ts +++ b/src/discord/voice-message.ts @@ -50,7 +50,9 @@ export async function getAudioDuration(filePath: string): Promise { } return Math.round(duration * 100) / 100; // Round to 2 decimal places } catch (err) { - throw new Error(`Failed to get audio duration: ${err instanceof Error ? err.message : err}`); + throw new Error(`Failed to get audio duration: ${err instanceof Error ? err.message : err}`, { + cause: err, + }); } } @@ -104,7 +106,7 @@ async function generateWaveformFromPcm(filePath: string): Promise { let sum = 0; let count = 0; for (let j = 0; j < step && i * step + j < samples.length; j++) { - sum += Math.abs(samples[i * step + j]!); + sum += Math.abs(samples[i * step + j]); count++; } const avg = count > 0 ? sum / count : 0; @@ -225,39 +227,27 @@ export async function sendDiscordVoiceMessage( metadata: VoiceMessageMetadata, replyTo: string | undefined, request: RetryRunner, - token: string, silent?: boolean, ): Promise<{ id: string; channel_id: string }> { const filename = "voice-message.ogg"; const fileSize = audioBuffer.byteLength; - // Step 1: Request upload URL (using fetch directly for proper Content-Type header) - // Wrapped in retry runner for consistency with other Discord API calls - const uploadUrlResponse = await request(async () => { - const res = await fetch(`https://discord.com/api/v10/channels/${channelId}/attachments`, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: `Bot ${token}`, - }, - body: JSON.stringify({ - files: [ - { - filename, - file_size: fileSize, - id: "0", - }, - ], - }), - }); - - if (!res.ok) { - const errorBody = await res.text(); - throw new Error(`Failed to get upload URL: ${res.status} ${errorBody}`); - } - - return (await res.json()) as UploadUrlResponse; - }, "voice-upload-url"); + // Step 1: Request upload URL from Discord + const uploadUrlResponse = await request( + () => + rest.post(`/channels/${channelId}/attachments`, { + body: { + files: [ + { + filename, + file_size: fileSize, + id: "0", + }, + ], + }, + }) as Promise, + "voice-upload-url", + ); if (!uploadUrlResponse.attachments?.[0]) { throw new Error("Failed to get upload URL for voice message");