Files
Moltbot/src/signal/format.ts
Hudson 1d6abddb9f fix(signal): outbound formatting and markdown IR rendering improvements (#9781)
* fix: Signal and markdown formatting improvements

Markdown IR fixes:
- Fix list-paragraph spacing (extra newline between list items and following paragraphs)
- Fix nested list indentation and newline handling
- Fix blockquote_close emitting redundant newline (inner content handles spacing)
- Render horizontal rules as visible ─── separator instead of silent drop
- Strip inner cell styles in code-mode tables to prevent overlapping with code_block span

Signal formatting fixes:
- Normalize URLs for dedup comparison (strip protocol, www., trailing slash)
- Render headings as bold text (headingStyle: 'bold')
- Add '> ' prefix to blockquotes for visual distinction
- Re-chunk after link expansion to respect chunk size limits

Tests:
- 51 new tests for markdown IR (spacing, lists, blockquotes, tables, HR)
- 18 new tests for Signal formatting (URL dedup, headings, blockquotes, HR, chunking)
- Update Slack nested list test expectation to match corrected IR output

* refactor: style-aware Signal text chunker

Replace indexOf-based chunk position tracking with deterministic
cursor tracking. The new splitSignalFormattedText:

- Splits at whitespace/newline boundaries within the limit
- Avoids breaking inside parentheses (preserves expanded link URLs)
- Slices style ranges at chunk boundaries with correct local offsets
- Tracks position via offset arithmetic instead of fragile indexOf

Removes dependency on chunkText from auto-reply/chunk.

Tests: 19 new tests covering style preservation across chunk boundaries,
edge cases (empty text, under limit, exact split points), and integration
with link expansion.

* fix: correct Signal style offsets with multiple link expansions

applyInsertionsToStyles() was using original coordinates for each
insertion without tracking cumulative shift from prior insertions.
This caused bold/italic/etc styles to drift to wrong text positions
when multiple markdown links expanded in a single message.

Added cumulative shift tracking and a regression test.

* test: clean up test noise and fix ineffective assertions

- Remove console.log from ir.list-spacing and ir.hr-spacing tests
- Fix ir.nested-lists.test.ts: remove ineffective regex assertion
- Fix ir.hr-spacing.test.ts: add actual assertions to edge case test

* refactor: split Signal formatting tests (#9781) (thanks @heyhudson)

---------

Co-authored-by: Hudson <258693705+hudson-rivera@users.noreply.github.com>
Co-authored-by: Peter Steinberger <steipete@gmail.com>
2026-02-14 16:57:20 +01:00

398 lines
10 KiB
TypeScript

import type { MarkdownTableMode } from "../config/types.base.js";
import {
chunkMarkdownIR,
markdownToIR,
type MarkdownIR,
type MarkdownStyle,
} from "../markdown/ir.js";
type SignalTextStyle = "BOLD" | "ITALIC" | "STRIKETHROUGH" | "MONOSPACE" | "SPOILER";
export type SignalTextStyleRange = {
start: number;
length: number;
style: SignalTextStyle;
};
export type SignalFormattedText = {
text: string;
styles: SignalTextStyleRange[];
};
type SignalMarkdownOptions = {
tableMode?: MarkdownTableMode;
};
type SignalStyleSpan = {
start: number;
end: number;
style: SignalTextStyle;
};
type Insertion = {
pos: number;
length: number;
};
function normalizeUrlForComparison(url: string): string {
let normalized = url.toLowerCase();
// Strip protocol
normalized = normalized.replace(/^https?:\/\//, "");
// Strip www. prefix
normalized = normalized.replace(/^www\./, "");
// Strip trailing slashes
normalized = normalized.replace(/\/+$/, "");
return normalized;
}
function mapStyle(style: MarkdownStyle): SignalTextStyle | null {
switch (style) {
case "bold":
return "BOLD";
case "italic":
return "ITALIC";
case "strikethrough":
return "STRIKETHROUGH";
case "code":
case "code_block":
return "MONOSPACE";
case "spoiler":
return "SPOILER";
default:
return null;
}
}
function mergeStyles(styles: SignalTextStyleRange[]): SignalTextStyleRange[] {
const sorted = [...styles].toSorted((a, b) => {
if (a.start !== b.start) {
return a.start - b.start;
}
if (a.length !== b.length) {
return a.length - b.length;
}
return a.style.localeCompare(b.style);
});
const merged: SignalTextStyleRange[] = [];
for (const style of sorted) {
const prev = merged[merged.length - 1];
if (prev && prev.style === style.style && style.start <= prev.start + prev.length) {
const prevEnd = prev.start + prev.length;
const nextEnd = Math.max(prevEnd, style.start + style.length);
prev.length = nextEnd - prev.start;
continue;
}
merged.push({ ...style });
}
return merged;
}
function clampStyles(styles: SignalTextStyleRange[], maxLength: number): SignalTextStyleRange[] {
const clamped: SignalTextStyleRange[] = [];
for (const style of styles) {
const start = Math.max(0, Math.min(style.start, maxLength));
const end = Math.min(style.start + style.length, maxLength);
const length = end - start;
if (length > 0) {
clamped.push({ start, length, style: style.style });
}
}
return clamped;
}
function applyInsertionsToStyles(
spans: SignalStyleSpan[],
insertions: Insertion[],
): SignalStyleSpan[] {
if (insertions.length === 0) {
return spans;
}
const sortedInsertions = [...insertions].toSorted((a, b) => a.pos - b.pos);
let updated = spans;
let cumulativeShift = 0;
for (const insertion of sortedInsertions) {
const insertionPos = insertion.pos + cumulativeShift;
const next: SignalStyleSpan[] = [];
for (const span of updated) {
if (span.end <= insertionPos) {
next.push(span);
continue;
}
if (span.start >= insertionPos) {
next.push({
start: span.start + insertion.length,
end: span.end + insertion.length,
style: span.style,
});
continue;
}
if (span.start < insertionPos && span.end > insertionPos) {
if (insertionPos > span.start) {
next.push({
start: span.start,
end: insertionPos,
style: span.style,
});
}
const shiftedStart = insertionPos + insertion.length;
const shiftedEnd = span.end + insertion.length;
if (shiftedEnd > shiftedStart) {
next.push({
start: shiftedStart,
end: shiftedEnd,
style: span.style,
});
}
}
}
updated = next;
cumulativeShift += insertion.length;
}
return updated;
}
function renderSignalText(ir: MarkdownIR): SignalFormattedText {
const text = ir.text ?? "";
if (!text) {
return { text: "", styles: [] };
}
const sortedLinks = [...ir.links].toSorted((a, b) => a.start - b.start);
let out = "";
let cursor = 0;
const insertions: Insertion[] = [];
for (const link of sortedLinks) {
if (link.start < cursor) {
continue;
}
out += text.slice(cursor, link.end);
const href = link.href.trim();
const label = text.slice(link.start, link.end);
const trimmedLabel = label.trim();
if (href) {
if (!trimmedLabel) {
out += href;
insertions.push({ pos: link.end, length: href.length });
} else {
// Check if label is similar enough to URL that showing both would be redundant
const normalizedLabel = normalizeUrlForComparison(trimmedLabel);
let comparableHref = href;
if (href.startsWith("mailto:")) {
comparableHref = href.slice("mailto:".length);
}
const normalizedHref = normalizeUrlForComparison(comparableHref);
// Only show URL if label is meaningfully different from it
if (normalizedLabel !== normalizedHref) {
const addition = ` (${href})`;
out += addition;
insertions.push({ pos: link.end, length: addition.length });
}
}
}
cursor = link.end;
}
out += text.slice(cursor);
const mappedStyles: SignalStyleSpan[] = ir.styles
.map((span) => {
const mapped = mapStyle(span.style);
if (!mapped) {
return null;
}
return { start: span.start, end: span.end, style: mapped };
})
.filter((span): span is SignalStyleSpan => span !== null);
const adjusted = applyInsertionsToStyles(mappedStyles, insertions);
const trimmedText = out.trimEnd();
const trimmedLength = trimmedText.length;
const clamped = clampStyles(
adjusted.map((span) => ({
start: span.start,
length: span.end - span.start,
style: span.style,
})),
trimmedLength,
);
return {
text: trimmedText,
styles: mergeStyles(clamped),
};
}
export function markdownToSignalText(
markdown: string,
options: SignalMarkdownOptions = {},
): SignalFormattedText {
const ir = markdownToIR(markdown ?? "", {
linkify: true,
enableSpoilers: true,
headingStyle: "bold",
blockquotePrefix: "> ",
tableMode: options.tableMode,
});
return renderSignalText(ir);
}
function sliceSignalStyles(
styles: SignalTextStyleRange[],
start: number,
end: number,
): SignalTextStyleRange[] {
const sliced: SignalTextStyleRange[] = [];
for (const style of styles) {
const styleEnd = style.start + style.length;
const sliceStart = Math.max(style.start, start);
const sliceEnd = Math.min(styleEnd, end);
if (sliceEnd > sliceStart) {
sliced.push({
start: sliceStart - start,
length: sliceEnd - sliceStart,
style: style.style,
});
}
}
return sliced;
}
/**
* Split Signal formatted text into chunks under the limit while preserving styles.
*
* This implementation deterministically tracks cursor position without using indexOf,
* which is fragile when chunks are trimmed or when duplicate substrings exist.
* Styles spanning chunk boundaries are split into separate ranges for each chunk.
*/
function splitSignalFormattedText(
formatted: SignalFormattedText,
limit: number,
): SignalFormattedText[] {
const { text, styles } = formatted;
if (text.length <= limit) {
return [formatted];
}
const results: SignalFormattedText[] = [];
let remaining = text;
let offset = 0; // Track position in original text for style slicing
while (remaining.length > 0) {
if (remaining.length <= limit) {
// Last chunk - take everything remaining
const trimmed = remaining.trimEnd();
if (trimmed.length > 0) {
results.push({
text: trimmed,
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + trimmed.length)),
});
}
break;
}
// Find a good break point within the limit
const window = remaining.slice(0, limit);
let breakIdx = findBreakIndex(window);
// If no good break point found, hard break at limit
if (breakIdx <= 0) {
breakIdx = limit;
}
// Extract chunk and trim trailing whitespace
const rawChunk = remaining.slice(0, breakIdx);
const chunk = rawChunk.trimEnd();
if (chunk.length > 0) {
results.push({
text: chunk,
styles: mergeStyles(sliceSignalStyles(styles, offset, offset + chunk.length)),
});
}
// Advance past the chunk and any whitespace separator
const brokeOnWhitespace = breakIdx < remaining.length && /\s/.test(remaining[breakIdx]);
const nextStart = Math.min(remaining.length, breakIdx + (brokeOnWhitespace ? 1 : 0));
// Chunks are sent as separate messages, so we intentionally drop boundary whitespace.
// Keep `offset` in sync with the dropped characters so style slicing stays correct.
remaining = remaining.slice(nextStart).trimStart();
offset = text.length - remaining.length;
}
return results;
}
/**
* Find the best break index within a text window.
* Prefers newlines over whitespace, avoids breaking inside parentheses.
*/
function findBreakIndex(window: string): number {
let lastNewline = -1;
let lastWhitespace = -1;
let parenDepth = 0;
for (let i = 0; i < window.length; i++) {
const char = window[i];
if (char === "(") {
parenDepth++;
continue;
}
if (char === ")" && parenDepth > 0) {
parenDepth--;
continue;
}
// Only consider break points outside parentheses
if (parenDepth === 0) {
if (char === "\n") {
lastNewline = i;
} else if (/\s/.test(char)) {
lastWhitespace = i;
}
}
}
// Prefer newline break, fall back to whitespace
return lastNewline > 0 ? lastNewline : lastWhitespace;
}
export function markdownToSignalTextChunks(
markdown: string,
limit: number,
options: SignalMarkdownOptions = {},
): SignalFormattedText[] {
const ir = markdownToIR(markdown ?? "", {
linkify: true,
enableSpoilers: true,
headingStyle: "bold",
blockquotePrefix: "> ",
tableMode: options.tableMode,
});
const chunks = chunkMarkdownIR(ir, limit);
const results: SignalFormattedText[] = [];
for (const chunk of chunks) {
const rendered = renderSignalText(chunk);
// If link expansion caused the chunk to exceed the limit, re-chunk it
if (rendered.text.length > limit) {
results.push(...splitSignalFormattedText(rendered, limit));
} else {
results.push(rendered);
}
}
return results;
}