diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cd163696..804ec6e78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ Docs: https://docs.clawd.bot - CLI: explain when auth profiles are excluded by auth.order in probe details. - CLI: drop the em dash when the banner tagline wraps to a second line. - CLI: inline auth probe errors in status rows to reduce wrapping. +- Telegram: render markdown in media captions. (#1478) - Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests. - Daemon: use platform PATH delimiters when building minimal service paths. - Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts. diff --git a/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts b/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts index fd9401dac..63ddd9bec 100644 --- a/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts +++ b/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts @@ -363,6 +363,7 @@ describe("createTelegramBot", () => { expect(sendAnimationSpy).toHaveBeenCalledTimes(1); expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), { caption: "caption", + parse_mode: "HTML", reply_to_message_id: undefined, }); expect(sendPhotoSpy).not.toHaveBeenCalled(); diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts index d4cdfaf4b..cb1ee3381 100644 --- a/src/telegram/bot.test.ts +++ b/src/telegram/bot.test.ts @@ -1392,6 +1392,7 @@ describe("createTelegramBot", () => { expect(sendAnimationSpy).toHaveBeenCalledTimes(1); expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), { caption: "caption", + parse_mode: "HTML", reply_to_message_id: undefined, }); expect(sendPhotoSpy).not.toHaveBeenCalled(); diff --git a/src/telegram/bot/delivery.test.ts b/src/telegram/bot/delivery.test.ts index 65328af90..d9302062e 100644 --- a/src/telegram/bot/delivery.test.ts +++ b/src/telegram/bot/delivery.test.ts @@ -74,4 +74,38 @@ describe("deliverReplies", () => { expect(sendVoice).toHaveBeenCalledTimes(1); expect(events).toEqual(["recordVoice", "sendVoice"]); }); + + it("renders markdown in media captions", async () => { + const runtime = { error: vi.fn(), log: vi.fn() }; + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 2, + chat: { id: "123" }, + }); + const bot = { api: { sendPhoto } } as unknown as Bot; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await deliverReplies({ + replies: [{ mediaUrl: "https://example.com/photo.jpg", text: "hi **boss**" }], + chatId: "123", + token: "tok", + runtime, + bot, + replyToMode: "off", + textLimit: 4000, + }); + + expect(sendPhoto).toHaveBeenCalledWith( + "123", + expect.anything(), + expect.objectContaining({ + caption: "hi boss", + parse_mode: "HTML", + }), + ); + }); }); diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts index e05b224da..653474d50 100644 --- a/src/telegram/bot/delivery.ts +++ b/src/telegram/bot/delivery.ts @@ -1,5 +1,9 @@ import { type Bot, InputFile } from "grammy"; -import { markdownToTelegramChunks, markdownToTelegramHtml } from "../format.js"; +import { + markdownToTelegramChunks, + markdownToTelegramHtml, + renderTelegramHtmlText, +} from "../format.js"; import { splitTelegramCaption } from "../caption.js"; import type { ReplyPayload } from "../../auto-reply/types.js"; import type { ReplyToMode } from "../../config/config.js"; @@ -87,6 +91,9 @@ export async function deliverReplies(params: { const { caption, followUpText } = splitTelegramCaption( isFirstMedia ? (reply.text ?? undefined) : undefined, ); + const htmlCaption = caption + ? renderTelegramHtmlText(caption, { tableMode: params.tableMode }) + : undefined; if (followUpText) { pendingFollowUpText = followUpText; } @@ -94,8 +101,9 @@ export async function deliverReplies(params: { const replyToMessageId = replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; const mediaParams: Record = { - caption, + caption: htmlCaption, reply_to_message_id: replyToMessageId, + ...(htmlCaption ? { parse_mode: "HTML" } : {}), }; if (threadParams) { mediaParams.message_thread_id = threadParams.message_thread_id; @@ -149,14 +157,12 @@ export async function deliverReplies(params: { for (const chunk of chunks) { const replyToMessageIdFollowup = replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined; - await bot.api.sendMessage( - chatId, - chunk.text, - buildTelegramSendParams({ - replyToMessageId: replyToMessageIdFollowup, - messageThreadId, - }), - ); + await sendTelegramText(bot, chatId, chunk.html, runtime, { + replyToMessageId: replyToMessageIdFollowup, + messageThreadId, + textMode: "html", + plainText: chunk.text, + }); if (replyToId && !hasReplied) { hasReplied = true; } diff --git a/src/telegram/format.ts b/src/telegram/format.ts index b0472c69c..472fc1f43 100644 --- a/src/telegram/format.ts +++ b/src/telegram/format.ts @@ -60,6 +60,15 @@ export function markdownToTelegramHtml( return renderTelegramHtml(ir); } +export function renderTelegramHtmlText( + text: string, + options: { textMode?: "markdown" | "html"; tableMode?: MarkdownTableMode } = {}, +): string { + const textMode = options.textMode ?? "markdown"; + if (textMode === "html") return text; + return markdownToTelegramHtml(text, { tableMode: options.tableMode }); +} + export function markdownToTelegramChunks( markdown: string, limit: number, diff --git a/src/telegram/send.caption-split.test.ts b/src/telegram/send.caption-split.test.ts index d625c9da3..58e0a921a 100644 --- a/src/telegram/send.caption-split.test.ts +++ b/src/telegram/send.caption-split.test.ts @@ -87,8 +87,10 @@ describe("sendMessageTelegram caption splitting", () => { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { caption: undefined, }); - // Then text sent as separate message (plain text, matching caption behavior) - expect(sendMessage).toHaveBeenCalledWith(chatId, longText); + // Then text sent as separate message (HTML formatting) + expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + parse_mode: "HTML", + }); // Returns the text message ID (the "main" content) expect(res.messageId).toBe("71"); }); @@ -123,12 +125,43 @@ describe("sendMessageTelegram caption splitting", () => { // Caption should be included with media expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { caption: shortText, + parse_mode: "HTML", }); // No separate text message needed expect(sendMessage).not.toHaveBeenCalled(); expect(res.messageId).toBe("72"); }); + it("renders markdown in media captions", async () => { + const chatId = "123"; + const caption = "hi **boss**"; + + const sendPhoto = vi.fn().mockResolvedValue({ + message_id: 90, + chat: { id: chatId }, + }); + const api = { sendPhoto } as unknown as { + sendPhoto: typeof sendPhoto; + }; + + loadWebMedia.mockResolvedValueOnce({ + buffer: Buffer.from("fake-image"), + contentType: "image/jpeg", + fileName: "photo.jpg", + }); + + await sendMessageTelegram(chatId, caption, { + token: "tok", + api, + mediaUrl: "https://example.com/photo.jpg", + }); + + expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { + caption: "hi boss", + parse_mode: "HTML", + }); + }); + it("preserves thread params when splitting long captions", async () => { const chatId = "-1001234567890"; const longText = "C".repeat(1100); @@ -166,8 +199,9 @@ describe("sendMessageTelegram caption splitting", () => { message_thread_id: 271, reply_to_message_id: 500, }); - // Text message also includes thread params (plain text, matching caption behavior) + // Text message also includes thread params (HTML formatting) expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + parse_mode: "HTML", message_thread_id: 271, reply_to_message_id: 500, }); @@ -209,6 +243,7 @@ describe("sendMessageTelegram caption splitting", () => { }); // Follow-up text has the reply_markup expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + parse_mode: "HTML", reply_markup: { inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], }, @@ -253,6 +288,7 @@ describe("sendMessageTelegram caption splitting", () => { reply_to_message_id: 500, }); expect(sendMessage).toHaveBeenCalledWith(chatId, longText, { + parse_mode: "HTML", message_thread_id: 271, reply_to_message_id: 500, reply_markup: { @@ -353,6 +389,7 @@ describe("sendMessageTelegram caption splitting", () => { // Media sent WITH reply_markup when not splitting expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { caption: shortText, + parse_mode: "HTML", reply_markup: { inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]], }, diff --git a/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts b/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts index 55d55d47b..18176d259 100644 --- a/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts +++ b/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts @@ -94,6 +94,7 @@ describe("buildInlineKeyboard", () => { expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), { caption: "photo in topic", + parse_mode: "HTML", message_thread_id: 99, }); }); diff --git a/src/telegram/send.returns-undefined-empty-input.test.ts b/src/telegram/send.returns-undefined-empty-input.test.ts index 22a85eb3d..bd83d7461 100644 --- a/src/telegram/send.returns-undefined-empty-input.test.ts +++ b/src/telegram/send.returns-undefined-empty-input.test.ts @@ -285,6 +285,7 @@ describe("sendMessageTelegram", () => { expect(sendAnimation).toHaveBeenCalledTimes(1); expect(sendAnimation).toHaveBeenCalledWith(chatId, expect.anything(), { caption: "caption", + parse_mode: "HTML", }); expect(res.messageId).toBe("9"); }); @@ -318,6 +319,7 @@ describe("sendMessageTelegram", () => { expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), { caption: "caption", + parse_mode: "HTML", }); expect(sendVoice).not.toHaveBeenCalled(); }); @@ -354,6 +356,7 @@ describe("sendMessageTelegram", () => { expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), { caption: "voice note", + parse_mode: "HTML", message_thread_id: 271, reply_to_message_id: 500, }); @@ -390,6 +393,7 @@ describe("sendMessageTelegram", () => { expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), { caption: "caption", + parse_mode: "HTML", }); expect(sendVoice).not.toHaveBeenCalled(); }); diff --git a/src/telegram/send.ts b/src/telegram/send.ts index 01120d354..0274f0b72 100644 --- a/src/telegram/send.ts +++ b/src/telegram/send.ts @@ -16,7 +16,7 @@ import { isGifMedia } from "../media/mime.js"; import { loadWebMedia } from "../web/media.js"; import { resolveTelegramAccount } from "./accounts.js"; import { resolveTelegramFetch } from "./fetch.js"; -import { markdownToTelegramHtml } from "./format.js"; +import { renderTelegramHtmlText } from "./format.js"; import { resolveMarkdownTableMode } from "../config/markdown-tables.js"; import { splitTelegramCaption } from "./caption.js"; import { recordSentMessage } from "./sent-message-cache.js"; @@ -190,6 +190,55 @@ export async function sendMessageTelegram( ); }; + const textMode = opts.textMode ?? "markdown"; + const tableMode = resolveMarkdownTableMode({ + cfg, + channel: "telegram", + accountId: account.accountId, + }); + const renderHtmlText = (value: string) => renderTelegramHtmlText(value, { textMode, tableMode }); + + const sendTelegramText = async ( + rawText: string, + params?: Record, + fallbackText?: string, + ) => { + const htmlText = renderHtmlText(rawText); + const sendParams = params + ? { + parse_mode: "HTML" as const, + ...params, + } + : { + parse_mode: "HTML" as const, + }; + const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch( + async (err) => { + // Telegram rejects malformed HTML (e.g., unsupported tags or entities). + // When that happens, fall back to plain text so the message still delivers. + const errText = formatErrorMessage(err); + if (PARSE_ERR_RE.test(errText)) { + if (opts.verbose) { + console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`); + } + const fallback = fallbackText ?? rawText; + const plainParams = params && Object.keys(params).length > 0 ? { ...params } : undefined; + return await request( + () => + plainParams + ? api.sendMessage(chatId, fallback, plainParams) + : api.sendMessage(chatId, fallback), + "message-plain", + ).catch((err2) => { + throw wrapChatNotFound(err2); + }); + } + throw wrapChatNotFound(err); + }, + ); + return res; + }; + if (mediaUrl) { const media = await loadWebMedia(mediaUrl, opts.maxBytes); const kind = mediaKindFromMime(media.contentType ?? undefined); @@ -200,21 +249,21 @@ export async function sendMessageTelegram( const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file"; const file = new InputFile(media.buffer, fileName); const { caption, followUpText } = splitTelegramCaption(text); + const htmlCaption = caption ? renderHtmlText(caption) : undefined; // If text exceeds Telegram's caption limit, send media without caption // then send text as a separate follow-up message. const needsSeparateText = Boolean(followUpText); // When splitting, put reply_markup only on the follow-up text (the "main" content), // not on the media message. - const mediaParams = hasThreadParams - ? { - caption, - ...threadParams, - ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : { - caption, - ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), - }; + const baseMediaParams = { + ...(hasThreadParams ? threadParams : {}), + ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}), + }; + const mediaParams = { + caption: htmlCaption, + ...(htmlCaption ? { parse_mode: "HTML" as const } : {}), + ...baseMediaParams, + }; let result: | Awaited> | Awaited> @@ -279,7 +328,7 @@ export async function sendMessageTelegram( }); // If text was too long for a caption, send it as a separate follow-up message. - // Use plain text to match caption behavior (captions don't use HTML conversion). + // Use HTML conversion so markdown renders like captions. if (needsSeparateText && followUpText) { const textParams = hasThreadParams || replyMarkup @@ -288,15 +337,7 @@ export async function sendMessageTelegram( ...(replyMarkup ? { reply_markup: replyMarkup } : {}), } : undefined; - const textRes = await request( - () => - textParams - ? api.sendMessage(chatId, followUpText, textParams) - : api.sendMessage(chatId, followUpText), - "message", - ).catch((err) => { - throw wrapChatNotFound(err); - }); + const textRes = await sendTelegramText(followUpText, textParams); // Return the text message ID as the "main" message (it's the actual content). return { messageId: String(textRes?.message_id ?? mediaMessageId), @@ -310,53 +351,14 @@ export async function sendMessageTelegram( if (!text || !text.trim()) { throw new Error("Message must be non-empty for Telegram sends"); } - const textMode = opts.textMode ?? "markdown"; - const tableMode = resolveMarkdownTableMode({ - cfg, - channel: "telegram", - accountId: account.accountId, - }); - const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text, { tableMode }); - const textParams = hasThreadParams - ? { - parse_mode: "HTML" as const, - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : { - parse_mode: "HTML" as const, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - }; - const res = await request(() => api.sendMessage(chatId, htmlText, textParams), "message").catch( - async (err) => { - // Telegram rejects malformed HTML (e.g., unsupported tags or entities). - // When that happens, fall back to plain text so the message still delivers. - const errText = formatErrorMessage(err); - if (PARSE_ERR_RE.test(errText)) { - if (opts.verbose) { - console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`); + const textParams = + hasThreadParams || replyMarkup + ? { + ...threadParams, + ...(replyMarkup ? { reply_markup: replyMarkup } : {}), } - const plainParams = - hasThreadParams || replyMarkup - ? { - ...threadParams, - ...(replyMarkup ? { reply_markup: replyMarkup } : {}), - } - : undefined; - const fallbackText = opts.plainText ?? text; - return await request( - () => - plainParams - ? api.sendMessage(chatId, fallbackText, plainParams) - : api.sendMessage(chatId, fallbackText), - "message-plain", - ).catch((err2) => { - throw wrapChatNotFound(err2); - }); - } - throw wrapChatNotFound(err); - }, - ); + : undefined; + const res = await sendTelegramText(text, textParams, opts.plainText); const messageId = String(res?.message_id ?? "unknown"); if (res?.message_id) { recordSentMessage(chatId, res.message_id);