fix: improve section extraction robustness (case-insensitive, H3, code blocks)

2026-02-16 20:48:24 +08:00
parent 90476d465d
commit d0b33f23eb
2 changed files with 116 additions and 15 deletions
--- a/src/auto-reply/reply/post-compaction-context.test.ts
+++ b/src/auto-reply/reply/post-compaction-context.test.ts
@@ -96,4 +96,74 @@ Ignore this.
    expect(result).not.toBeNull();
    expect(result).toContain("[truncated]");
  });
+
+  it("matches section names case-insensitively", async () => {
+    const content = `# Rules
+
+## session startup
+
+Read WORKFLOW_AUTO.md
+
+## Other
+`;
+    fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
+    const result = await readPostCompactionContext(tmpDir);
+    expect(result).not.toBeNull();
+    expect(result).toContain("WORKFLOW_AUTO.md");
+  });
+
+  it("matches H3 headings", async () => {
+    const content = `# Rules
+
+### Session Startup
+
+Read these files.
+
+### Other
+`;
+    fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
+    const result = await readPostCompactionContext(tmpDir);
+    expect(result).not.toBeNull();
+    expect(result).toContain("Read these files");
+  });
+
+  it("skips sections inside code blocks", async () => {
+    const content = `# Rules
+
+\`\`\`markdown
+## Session Startup
+This is inside a code block and should NOT be extracted.
+\`\`\`
+
+## Red Lines
+
+Real red lines here.
+
+## Other
+`;
+    fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
+    const result = await readPostCompactionContext(tmpDir);
+    expect(result).not.toBeNull();
+    expect(result).toContain("Real red lines here");
+    expect(result).not.toContain("inside a code block");
+  });
+
+  it("includes sub-headings within a section", async () => {
+    const content = `## Red Lines
+
+### Rule 1
+Never do X.
+
+### Rule 2
+Never do Y.
+
+## Other Section
+`;
+    fs.writeFileSync(path.join(tmpDir, "AGENTS.md"), content);
+    const result = await readPostCompactionContext(tmpDir);
+    expect(result).not.toBeNull();
+    expect(result).toContain("Rule 1");
+    expect(result).toContain("Rule 2");
+    expect(result).not.toContain("Other Section");
+  });
 });
--- a/src/auto-reply/reply/post-compaction-context.ts
+++ b/src/auto-reply/reply/post-compaction-context.ts
@@ -44,8 +44,10 @@ export async function readPostCompactionContext(workspaceDir: string): Promise<s
 }

 /**
- * Extract named H2 sections from markdown content.
- * Matches "## SectionName" and captures until the next "## " or end of string.
+ * Extract named sections from markdown content.
+ * Matches H2 (##) or H3 (###) headings case-insensitively.
+ * Skips content inside fenced code blocks.
+ * Captures until the next heading of same or higher level, or end of string.
 */
 function extractSections(content: string, sectionNames: string[]): string[] {
  const results: string[] = [];
@@ -54,21 +56,54 @@ function extractSections(content: string, sectionNames: string[]): string[] {
  for (const name of sectionNames) {
    let sectionLines: string[] = [];
    let inSection = false;
+    let sectionLevel = 0;
+    let inCodeBlock = false;

    for (const line of lines) {
-      // Check if this is the start of our target section
-      if (line.match(new RegExp(`^##\\s+${escapeRegExp(name)}\\s*$`))) {
-        inSection = true;
-        sectionLines = [line];
+      // Track fenced code blocks
+      if (line.trimStart().startsWith("```")) {
+        inCodeBlock = !inCodeBlock;
+        if (inSection) {
+          sectionLines.push(line);
+        }
        continue;
      }

-      // If we're in the section, check if we've hit another H2 heading
-      if (inSection) {
-        if (line.match(/^##\s+/)) {
-          // Hit another H2 heading, stop collecting
-          break;
+      // Skip heading detection inside code blocks
+      if (inCodeBlock) {
+        if (inSection) {
+          sectionLines.push(line);
        }
+        continue;
+      }
+
+      // Check if this line is a heading
+      const headingMatch = line.match(/^(#{2,3})\s+(.+?)\s*$/);
+
+      if (headingMatch) {
+        const level = headingMatch[1].length; // 2 or 3
+        const headingText = headingMatch[2];
+
+        if (!inSection) {
+          // Check if this is our target section (case-insensitive)
+          if (headingText.toLowerCase() === name.toLowerCase()) {
+            inSection = true;
+            sectionLevel = level;
+            sectionLines = [line];
+            continue;
+          }
+        } else {
+          // We're in section — stop if we hit a heading of same or higher level
+          if (level <= sectionLevel) {
+            break;
+          }
+          // Lower-level heading (e.g., ### inside ##) — include it
+          sectionLines.push(line);
+          continue;
+        }
+      }
+
+      if (inSection) {
        sectionLines.push(line);
      }
    }
@@ -80,7 +115,3 @@ function extractSections(content: string, sectionNames: string[]): string[] {

  return results;
 }
-
-function escapeRegExp(str: string): string {
-  return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
-}