import type { MarkdownTableMode } from "../config/types.base.js"; import { chunkMarkdownIR, markdownToIR, type MarkdownLinkSpan, type MarkdownIR, } from "../markdown/ir.js"; import { renderMarkdownWithMarkers } from "../markdown/render.js"; export type TelegramFormattedChunk = { html: string; text: string; }; function escapeHtml(text: string): string { return text.replace(/&/g, "&").replace(//g, ">"); } function escapeHtmlAttr(text: string): string { return escapeHtml(text).replace(/"/g, """); } /** * File extensions that share TLDs and commonly appear in code/documentation. * These are wrapped in tags to prevent Telegram from generating * spurious domain registrar previews. * * Only includes extensions that are: * 1. Commonly used as file extensions in code/docs * 2. Rarely used as intentional domain references * * Excluded: .ai, .io, .tv, .fm (popular domain TLDs like x.ai, vercel.io, github.io) */ const FILE_EXTENSIONS_WITH_TLD = new Set([ "md", // Markdown (Moldova) - very common in repos "go", // Go language - common in Go projects "py", // Python (Paraguay) - common in Python projects "pl", // Perl (Poland) - common in Perl projects "sh", // Shell (Saint Helena) - common for scripts "am", // Automake files (Armenia) "at", // Assembly (Austria) "be", // Backend files (Belgium) "cc", // C++ source (Cocos Islands) ]); /** Detects when markdown-it linkify auto-generated a link from a bare filename (e.g. README.md → http://README.md) */ function isAutoLinkedFileRef(href: string, label: string): boolean { const stripped = href.replace(/^https?:\/\//i, ""); if (stripped !== label) { return false; } const dotIndex = label.lastIndexOf("."); if (dotIndex < 1) { return false; } const ext = label.slice(dotIndex + 1).toLowerCase(); if (!FILE_EXTENSIONS_WITH_TLD.has(ext)) { return false; } // Reject if any path segment before the filename contains a dot (looks like a domain) const segments = label.split("/"); if (segments.length > 1) { for (let i = 0; i < segments.length - 1; i++) { if (segments[i].includes(".")) { return false; } } } return true; } function buildTelegramLink(link: MarkdownLinkSpan, text: string) { const href = link.href.trim(); if (!href) { return null; } if (link.start === link.end) { return null; } // Suppress auto-linkified file references (e.g. README.md → http://README.md) const label = text.slice(link.start, link.end); if (isAutoLinkedFileRef(href, label)) { return null; } const safeHref = escapeHtmlAttr(href); return { start: link.start, end: link.end, open: ``, close: "", }; } function renderTelegramHtml(ir: MarkdownIR): string { return renderMarkdownWithMarkers(ir, { styleMarkers: { bold: { open: "", close: "" }, italic: { open: "", close: "" }, strikethrough: { open: "", close: "" }, code: { open: "", close: "" }, code_block: { open: "
", close: "
" }, spoiler: { open: "", close: "" }, blockquote: { open: "
", close: "
" }, }, escapeText: escapeHtml, buildLink: buildTelegramLink, }); } export function markdownToTelegramHtml( markdown: string, options: { tableMode?: MarkdownTableMode; wrapFileRefs?: boolean } = {}, ): string { const ir = markdownToIR(markdown ?? "", { linkify: true, enableSpoilers: true, headingStyle: "none", blockquotePrefix: "", tableMode: options.tableMode, }); const html = renderTelegramHtml(ir); // Apply file reference wrapping if requested (for chunked rendering) if (options.wrapFileRefs !== false) { return wrapFileReferencesInHtml(html); } return html; } /** * Wraps standalone file references (with TLD extensions) in tags. * This prevents Telegram from treating them as URLs and generating * irrelevant domain registrar previews. * * Runs AFTER markdown→HTML conversion to avoid modifying HTML attributes. * Skips content inside ,
, and  tags to avoid nesting issues.
 */
/** Escape regex metacharacters in a string */
function escapeRegex(str: string): string {
  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}

const FILE_EXTENSIONS_PATTERN = Array.from(FILE_EXTENSIONS_WITH_TLD).map(escapeRegex).join("|");
const AUTO_LINKED_ANCHOR_PATTERN = /]*>\1<\/a>/gi;
const FILE_REFERENCE_PATTERN = new RegExp(
  `(^|[^a-zA-Z0-9_\\-/])([a-zA-Z0-9_.\\-./]+\\.(?:${FILE_EXTENSIONS_PATTERN}))(?=$|[^a-zA-Z0-9_\\-/])`,
  "gi",
);
const ORPHANED_TLD_PATTERN = new RegExp(
  `([^a-zA-Z0-9]|^)([A-Za-z]\\.(?:${FILE_EXTENSIONS_PATTERN}))(?=[^a-zA-Z0-9/]|$)`,
  "g",
);
const HTML_TAG_PATTERN = /(<\/?)([a-zA-Z][a-zA-Z0-9-]*)\b[^>]*?>/gi;

function wrapStandaloneFileRef(match: string, prefix: string, filename: string): string {
  if (filename.startsWith("//")) {
    return match;
  }
  if (/https?:\/\/$/i.test(prefix)) {
    return match;
  }
  return `${prefix}${escapeHtml(filename)}`;
}

function wrapSegmentFileRefs(
  text: string,
  codeDepth: number,
  preDepth: number,
  anchorDepth: number,
): string {
  if (!text || codeDepth > 0 || preDepth > 0 || anchorDepth > 0) {
    return text;
  }
  const wrappedStandalone = text.replace(FILE_REFERENCE_PATTERN, wrapStandaloneFileRef);
  return wrappedStandalone.replace(ORPHANED_TLD_PATTERN, (match, prefix: string, tld: string) =>
    prefix === ">" ? match : `${prefix}${escapeHtml(tld)}`,
  );
}

export function wrapFileReferencesInHtml(html: string): string {
  // Safety-net: de-linkify auto-generated anchors where href="http://