diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5cd163696..804ec6e78 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -34,6 +34,7 @@ Docs: https://docs.clawd.bot
- CLI: explain when auth profiles are excluded by auth.order in probe details.
- CLI: drop the em dash when the banner tagline wraps to a second line.
- CLI: inline auth probe errors in status rows to reduce wrapping.
+- Telegram: render markdown in media captions. (#1478)
- Agents: honor enqueue overrides for embedded runs to avoid queue deadlocks in tests.
- Daemon: use platform PATH delimiters when building minimal service paths.
- Tests: skip embedded runner ordering assertion on Windows to avoid CI timeouts.
diff --git a/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts b/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts
index fd9401dac..63ddd9bec 100644
--- a/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts
+++ b/src/telegram/bot.create-telegram-bot.routes-dms-by-telegram-accountid-binding.test.ts
@@ -363,6 +363,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption",
+ parse_mode: "HTML",
reply_to_message_id: undefined,
});
expect(sendPhotoSpy).not.toHaveBeenCalled();
diff --git a/src/telegram/bot.test.ts b/src/telegram/bot.test.ts
index d4cdfaf4b..cb1ee3381 100644
--- a/src/telegram/bot.test.ts
+++ b/src/telegram/bot.test.ts
@@ -1392,6 +1392,7 @@ describe("createTelegramBot", () => {
expect(sendAnimationSpy).toHaveBeenCalledTimes(1);
expect(sendAnimationSpy).toHaveBeenCalledWith("1234", expect.anything(), {
caption: "caption",
+ parse_mode: "HTML",
reply_to_message_id: undefined,
});
expect(sendPhotoSpy).not.toHaveBeenCalled();
diff --git a/src/telegram/bot/delivery.test.ts b/src/telegram/bot/delivery.test.ts
index 65328af90..d9302062e 100644
--- a/src/telegram/bot/delivery.test.ts
+++ b/src/telegram/bot/delivery.test.ts
@@ -74,4 +74,38 @@ describe("deliverReplies", () => {
expect(sendVoice).toHaveBeenCalledTimes(1);
expect(events).toEqual(["recordVoice", "sendVoice"]);
});
+
+ it("renders markdown in media captions", async () => {
+ const runtime = { error: vi.fn(), log: vi.fn() };
+ const sendPhoto = vi.fn().mockResolvedValue({
+ message_id: 2,
+ chat: { id: "123" },
+ });
+ const bot = { api: { sendPhoto } } as unknown as Bot;
+
+ loadWebMedia.mockResolvedValueOnce({
+ buffer: Buffer.from("image"),
+ contentType: "image/jpeg",
+ fileName: "photo.jpg",
+ });
+
+ await deliverReplies({
+ replies: [{ mediaUrl: "https://example.com/photo.jpg", text: "hi **boss**" }],
+ chatId: "123",
+ token: "tok",
+ runtime,
+ bot,
+ replyToMode: "off",
+ textLimit: 4000,
+ });
+
+ expect(sendPhoto).toHaveBeenCalledWith(
+ "123",
+ expect.anything(),
+ expect.objectContaining({
+ caption: "hi boss",
+ parse_mode: "HTML",
+ }),
+ );
+ });
});
diff --git a/src/telegram/bot/delivery.ts b/src/telegram/bot/delivery.ts
index e05b224da..653474d50 100644
--- a/src/telegram/bot/delivery.ts
+++ b/src/telegram/bot/delivery.ts
@@ -1,5 +1,9 @@
import { type Bot, InputFile } from "grammy";
-import { markdownToTelegramChunks, markdownToTelegramHtml } from "../format.js";
+import {
+ markdownToTelegramChunks,
+ markdownToTelegramHtml,
+ renderTelegramHtmlText,
+} from "../format.js";
import { splitTelegramCaption } from "../caption.js";
import type { ReplyPayload } from "../../auto-reply/types.js";
import type { ReplyToMode } from "../../config/config.js";
@@ -87,6 +91,9 @@ export async function deliverReplies(params: {
const { caption, followUpText } = splitTelegramCaption(
isFirstMedia ? (reply.text ?? undefined) : undefined,
);
+ const htmlCaption = caption
+ ? renderTelegramHtmlText(caption, { tableMode: params.tableMode })
+ : undefined;
if (followUpText) {
pendingFollowUpText = followUpText;
}
@@ -94,8 +101,9 @@ export async function deliverReplies(params: {
const replyToMessageId =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
const mediaParams: Record = {
- caption,
+ caption: htmlCaption,
reply_to_message_id: replyToMessageId,
+ ...(htmlCaption ? { parse_mode: "HTML" } : {}),
};
if (threadParams) {
mediaParams.message_thread_id = threadParams.message_thread_id;
@@ -149,14 +157,12 @@ export async function deliverReplies(params: {
for (const chunk of chunks) {
const replyToMessageIdFollowup =
replyToId && (replyToMode === "all" || !hasReplied) ? replyToId : undefined;
- await bot.api.sendMessage(
- chatId,
- chunk.text,
- buildTelegramSendParams({
- replyToMessageId: replyToMessageIdFollowup,
- messageThreadId,
- }),
- );
+ await sendTelegramText(bot, chatId, chunk.html, runtime, {
+ replyToMessageId: replyToMessageIdFollowup,
+ messageThreadId,
+ textMode: "html",
+ plainText: chunk.text,
+ });
if (replyToId && !hasReplied) {
hasReplied = true;
}
diff --git a/src/telegram/format.ts b/src/telegram/format.ts
index b0472c69c..472fc1f43 100644
--- a/src/telegram/format.ts
+++ b/src/telegram/format.ts
@@ -60,6 +60,15 @@ export function markdownToTelegramHtml(
return renderTelegramHtml(ir);
}
+export function renderTelegramHtmlText(
+ text: string,
+ options: { textMode?: "markdown" | "html"; tableMode?: MarkdownTableMode } = {},
+): string {
+ const textMode = options.textMode ?? "markdown";
+ if (textMode === "html") return text;
+ return markdownToTelegramHtml(text, { tableMode: options.tableMode });
+}
+
export function markdownToTelegramChunks(
markdown: string,
limit: number,
diff --git a/src/telegram/send.caption-split.test.ts b/src/telegram/send.caption-split.test.ts
index d625c9da3..58e0a921a 100644
--- a/src/telegram/send.caption-split.test.ts
+++ b/src/telegram/send.caption-split.test.ts
@@ -87,8 +87,10 @@ describe("sendMessageTelegram caption splitting", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: undefined,
});
- // Then text sent as separate message (plain text, matching caption behavior)
- expect(sendMessage).toHaveBeenCalledWith(chatId, longText);
+ // Then text sent as separate message (HTML formatting)
+ expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
+ parse_mode: "HTML",
+ });
// Returns the text message ID (the "main" content)
expect(res.messageId).toBe("71");
});
@@ -123,12 +125,43 @@ describe("sendMessageTelegram caption splitting", () => {
// Caption should be included with media
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
+ parse_mode: "HTML",
});
// No separate text message needed
expect(sendMessage).not.toHaveBeenCalled();
expect(res.messageId).toBe("72");
});
+ it("renders markdown in media captions", async () => {
+ const chatId = "123";
+ const caption = "hi **boss**";
+
+ const sendPhoto = vi.fn().mockResolvedValue({
+ message_id: 90,
+ chat: { id: chatId },
+ });
+ const api = { sendPhoto } as unknown as {
+ sendPhoto: typeof sendPhoto;
+ };
+
+ loadWebMedia.mockResolvedValueOnce({
+ buffer: Buffer.from("fake-image"),
+ contentType: "image/jpeg",
+ fileName: "photo.jpg",
+ });
+
+ await sendMessageTelegram(chatId, caption, {
+ token: "tok",
+ api,
+ mediaUrl: "https://example.com/photo.jpg",
+ });
+
+ expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
+ caption: "hi boss",
+ parse_mode: "HTML",
+ });
+ });
+
it("preserves thread params when splitting long captions", async () => {
const chatId = "-1001234567890";
const longText = "C".repeat(1100);
@@ -166,8 +199,9 @@ describe("sendMessageTelegram caption splitting", () => {
message_thread_id: 271,
reply_to_message_id: 500,
});
- // Text message also includes thread params (plain text, matching caption behavior)
+ // Text message also includes thread params (HTML formatting)
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
+ parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
});
@@ -209,6 +243,7 @@ describe("sendMessageTelegram caption splitting", () => {
});
// Follow-up text has the reply_markup
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
+ parse_mode: "HTML",
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
@@ -253,6 +288,7 @@ describe("sendMessageTelegram caption splitting", () => {
reply_to_message_id: 500,
});
expect(sendMessage).toHaveBeenCalledWith(chatId, longText, {
+ parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
reply_markup: {
@@ -353,6 +389,7 @@ describe("sendMessageTelegram caption splitting", () => {
// Media sent WITH reply_markup when not splitting
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: shortText,
+ parse_mode: "HTML",
reply_markup: {
inline_keyboard: [[{ text: "Click me", callback_data: "action:click" }]],
},
diff --git a/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts b/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts
index 55d55d47b..18176d259 100644
--- a/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts
+++ b/src/telegram/send.preserves-thread-params-plain-text-fallback.test.ts
@@ -94,6 +94,7 @@ describe("buildInlineKeyboard", () => {
expect(sendPhoto).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "photo in topic",
+ parse_mode: "HTML",
message_thread_id: 99,
});
});
diff --git a/src/telegram/send.returns-undefined-empty-input.test.ts b/src/telegram/send.returns-undefined-empty-input.test.ts
index 22a85eb3d..bd83d7461 100644
--- a/src/telegram/send.returns-undefined-empty-input.test.ts
+++ b/src/telegram/send.returns-undefined-empty-input.test.ts
@@ -285,6 +285,7 @@ describe("sendMessageTelegram", () => {
expect(sendAnimation).toHaveBeenCalledTimes(1);
expect(sendAnimation).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
+ parse_mode: "HTML",
});
expect(res.messageId).toBe("9");
});
@@ -318,6 +319,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
+ parse_mode: "HTML",
});
expect(sendVoice).not.toHaveBeenCalled();
});
@@ -354,6 +356,7 @@ describe("sendMessageTelegram", () => {
expect(sendVoice).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "voice note",
+ parse_mode: "HTML",
message_thread_id: 271,
reply_to_message_id: 500,
});
@@ -390,6 +393,7 @@ describe("sendMessageTelegram", () => {
expect(sendAudio).toHaveBeenCalledWith(chatId, expect.anything(), {
caption: "caption",
+ parse_mode: "HTML",
});
expect(sendVoice).not.toHaveBeenCalled();
});
diff --git a/src/telegram/send.ts b/src/telegram/send.ts
index 01120d354..0274f0b72 100644
--- a/src/telegram/send.ts
+++ b/src/telegram/send.ts
@@ -16,7 +16,7 @@ import { isGifMedia } from "../media/mime.js";
import { loadWebMedia } from "../web/media.js";
import { resolveTelegramAccount } from "./accounts.js";
import { resolveTelegramFetch } from "./fetch.js";
-import { markdownToTelegramHtml } from "./format.js";
+import { renderTelegramHtmlText } from "./format.js";
import { resolveMarkdownTableMode } from "../config/markdown-tables.js";
import { splitTelegramCaption } from "./caption.js";
import { recordSentMessage } from "./sent-message-cache.js";
@@ -190,6 +190,55 @@ export async function sendMessageTelegram(
);
};
+ const textMode = opts.textMode ?? "markdown";
+ const tableMode = resolveMarkdownTableMode({
+ cfg,
+ channel: "telegram",
+ accountId: account.accountId,
+ });
+ const renderHtmlText = (value: string) => renderTelegramHtmlText(value, { textMode, tableMode });
+
+ const sendTelegramText = async (
+ rawText: string,
+ params?: Record,
+ fallbackText?: string,
+ ) => {
+ const htmlText = renderHtmlText(rawText);
+ const sendParams = params
+ ? {
+ parse_mode: "HTML" as const,
+ ...params,
+ }
+ : {
+ parse_mode: "HTML" as const,
+ };
+ const res = await request(() => api.sendMessage(chatId, htmlText, sendParams), "message").catch(
+ async (err) => {
+ // Telegram rejects malformed HTML (e.g., unsupported tags or entities).
+ // When that happens, fall back to plain text so the message still delivers.
+ const errText = formatErrorMessage(err);
+ if (PARSE_ERR_RE.test(errText)) {
+ if (opts.verbose) {
+ console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
+ }
+ const fallback = fallbackText ?? rawText;
+ const plainParams = params && Object.keys(params).length > 0 ? { ...params } : undefined;
+ return await request(
+ () =>
+ plainParams
+ ? api.sendMessage(chatId, fallback, plainParams)
+ : api.sendMessage(chatId, fallback),
+ "message-plain",
+ ).catch((err2) => {
+ throw wrapChatNotFound(err2);
+ });
+ }
+ throw wrapChatNotFound(err);
+ },
+ );
+ return res;
+ };
+
if (mediaUrl) {
const media = await loadWebMedia(mediaUrl, opts.maxBytes);
const kind = mediaKindFromMime(media.contentType ?? undefined);
@@ -200,21 +249,21 @@ export async function sendMessageTelegram(
const fileName = media.fileName ?? (isGif ? "animation.gif" : inferFilename(kind)) ?? "file";
const file = new InputFile(media.buffer, fileName);
const { caption, followUpText } = splitTelegramCaption(text);
+ const htmlCaption = caption ? renderHtmlText(caption) : undefined;
// If text exceeds Telegram's caption limit, send media without caption
// then send text as a separate follow-up message.
const needsSeparateText = Boolean(followUpText);
// When splitting, put reply_markup only on the follow-up text (the "main" content),
// not on the media message.
- const mediaParams = hasThreadParams
- ? {
- caption,
- ...threadParams,
- ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
- }
- : {
- caption,
- ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
- };
+ const baseMediaParams = {
+ ...(hasThreadParams ? threadParams : {}),
+ ...(!needsSeparateText && replyMarkup ? { reply_markup: replyMarkup } : {}),
+ };
+ const mediaParams = {
+ caption: htmlCaption,
+ ...(htmlCaption ? { parse_mode: "HTML" as const } : {}),
+ ...baseMediaParams,
+ };
let result:
| Awaited>
| Awaited>
@@ -279,7 +328,7 @@ export async function sendMessageTelegram(
});
// If text was too long for a caption, send it as a separate follow-up message.
- // Use plain text to match caption behavior (captions don't use HTML conversion).
+ // Use HTML conversion so markdown renders like captions.
if (needsSeparateText && followUpText) {
const textParams =
hasThreadParams || replyMarkup
@@ -288,15 +337,7 @@ export async function sendMessageTelegram(
...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
: undefined;
- const textRes = await request(
- () =>
- textParams
- ? api.sendMessage(chatId, followUpText, textParams)
- : api.sendMessage(chatId, followUpText),
- "message",
- ).catch((err) => {
- throw wrapChatNotFound(err);
- });
+ const textRes = await sendTelegramText(followUpText, textParams);
// Return the text message ID as the "main" message (it's the actual content).
return {
messageId: String(textRes?.message_id ?? mediaMessageId),
@@ -310,53 +351,14 @@ export async function sendMessageTelegram(
if (!text || !text.trim()) {
throw new Error("Message must be non-empty for Telegram sends");
}
- const textMode = opts.textMode ?? "markdown";
- const tableMode = resolveMarkdownTableMode({
- cfg,
- channel: "telegram",
- accountId: account.accountId,
- });
- const htmlText = textMode === "html" ? text : markdownToTelegramHtml(text, { tableMode });
- const textParams = hasThreadParams
- ? {
- parse_mode: "HTML" as const,
- ...threadParams,
- ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
- }
- : {
- parse_mode: "HTML" as const,
- ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
- };
- const res = await request(() => api.sendMessage(chatId, htmlText, textParams), "message").catch(
- async (err) => {
- // Telegram rejects malformed HTML (e.g., unsupported tags or entities).
- // When that happens, fall back to plain text so the message still delivers.
- const errText = formatErrorMessage(err);
- if (PARSE_ERR_RE.test(errText)) {
- if (opts.verbose) {
- console.warn(`telegram HTML parse failed, retrying as plain text: ${errText}`);
+ const textParams =
+ hasThreadParams || replyMarkup
+ ? {
+ ...threadParams,
+ ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
}
- const plainParams =
- hasThreadParams || replyMarkup
- ? {
- ...threadParams,
- ...(replyMarkup ? { reply_markup: replyMarkup } : {}),
- }
- : undefined;
- const fallbackText = opts.plainText ?? text;
- return await request(
- () =>
- plainParams
- ? api.sendMessage(chatId, fallbackText, plainParams)
- : api.sendMessage(chatId, fallbackText),
- "message-plain",
- ).catch((err2) => {
- throw wrapChatNotFound(err2);
- });
- }
- throw wrapChatNotFound(err);
- },
- );
+ : undefined;
+ const res = await sendTelegramText(text, textParams, opts.plainText);
const messageId = String(res?.message_id ?? "unknown");
if (res?.message_id) {
recordSentMessage(chatId, res.message_id);