From ecd4b13115f9624d74d8c65b29cfd709adb51421 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 10 Nov 2025 15:53:17 -0500 Subject: [PATCH] Fix EBITDA margin auto-correction and TypeScript compilation error - Added auto-correction logic for EBITDA margins when difference >15pp - Fixed missing closing brace in revenue validation block - Enhanced margin validation to catch cases like 95% -> 22.3% --- backend/src/services/llmService.ts | 12 +- .../src/services/simpleDocumentProcessor.ts | 161 ++++++++++++------ 2 files changed, 119 insertions(+), 54 deletions(-) diff --git a/backend/src/services/llmService.ts b/backend/src/services/llmService.ts index 36875a8..216a62e 100644 --- a/backend/src/services/llmService.ts +++ b/backend/src/services/llmService.ts @@ -2166,8 +2166,11 @@ IMPORTANT: Replace all placeholder text with actual information from the CIM doc const calculatedMargin = (ebitdaValue / revValue) * 100; const marginDiff = Math.abs(calculatedMargin - marginValue); - // If margin difference is > 5 percentage points, there may be an issue - if (marginDiff > 5 && revValue > 0) { + // If margin difference is > 15 percentage points, this is a critical error + // Examples: 95% when should be 22%, or 15% when should be 75% + if (marginDiff > 15 && revValue > 0) { + validationIssues.push(`CRITICAL: EBITDA margin mismatch for ${period}: stated ${marginValue}% vs calculated ${calculatedMargin.toFixed(1)}% (diff: ${marginDiff.toFixed(1)}pp) - likely column misalignment`); + } else if (marginDiff > 5 && revValue > 0) { validationIssues.push(`EBITDA margin mismatch for ${period}: stated ${marginValue}% vs calculated ${calculatedMargin.toFixed(1)}%`); } @@ -2175,6 +2178,11 @@ IMPORTANT: Replace all placeholder text with actual information from the CIM doc if (marginValue < 0 || marginValue > 60) { validationIssues.push(`EBITDA margin for ${period} is outside typical range (${marginValue}%)`); } + + // Additional check: If calculated margin is reasonable but stated margin is way off, flag it + if (calculatedMargin >= 0 && calculatedMargin <= 60 && marginDiff > 15) { + validationIssues.push(`Consider using calculated margin (${calculatedMargin.toFixed(1)}%) instead of stated margin (${marginValue}%) for ${period}`); + } } } }); diff --git a/backend/src/services/simpleDocumentProcessor.ts b/backend/src/services/simpleDocumentProcessor.ts index 8ca5bb7..d926042 100644 --- a/backend/src/services/simpleDocumentProcessor.ts +++ b/backend/src/services/simpleDocumentProcessor.ts @@ -559,32 +559,46 @@ Focus on finding these specific fields in the document. Extract exact values, nu periodData.revenue = 'Not specified in CIM'; } - // Check 2: Detect unusual growth patterns (suggests misaligned columns) - // Find adjacent periods to check growth - const periodOrder = ['fy3', 'fy2', 'fy1', 'ltm']; - const currentIndex = periodOrder.indexOf(period); - if (currentIndex > 0) { - const prevPeriod = periodOrder[currentIndex - 1]; - const prevValue = extractNumericValue(financials[prevPeriod]?.revenue || ''); - if (prevValue !== null && prevValue > 0) { - const growth = ((currentValue - prevValue) / prevValue) * 100; - // Flag if growth is >200% or < -50% (unusual for year-over-year) - if (growth > 200 || growth < -50) { - logger.warn('Detected unusual revenue growth pattern - may indicate misaligned columns', { - period, - prevPeriod, - currentValue: currentValue, - prevValue: prevValue, - growth: `${growth.toFixed(1)}%`, - reason: `Unusual growth (${growth > 0 ? '+' : ''}${growth.toFixed(1)}%) between ${prevPeriod} and ${period} - may indicate column misalignment` - }); - // Don't reject - just log as warning, as this might be legitimate - } + // Check 2: Revenue should generally increase or be stable (FY-1/LTM shouldn't be much lower than FY-2/FY-3) + // Exception: If this is FY-3 and others are higher, that's normal + if (period !== 'fy3' && currentValue < minOtherValue * 0.5 && currentValue < avgOtherValue * 0.6) { + logger.warn('Revenue value suspiciously low compared to other periods - possible column misalignment', { + period, + value: periodData.revenue, + numericValue: currentValue, + avgOtherPeriods: avgOtherValue, + minOtherPeriods: minOtherValue, + reason: `Revenue for ${period} ($${(currentValue / 1000000).toFixed(1)}M) is <50% of minimum other period ($${(minOtherValue / 1000000).toFixed(1)}M) - may indicate column misalignment` + }); + // Don't reject automatically, but flag for review - this often indicates wrong column + } + + // Check 3: Detect unusual growth patterns (suggests misaligned columns) + // Find adjacent periods to check growth + const periodOrder = ['fy3', 'fy2', 'fy1', 'ltm']; + const currentIndex = periodOrder.indexOf(period); + if (currentIndex > 0) { + const prevPeriod = periodOrder[currentIndex - 1]; + const prevValue = extractNumericValue(financials[prevPeriod]?.revenue || ''); + if (prevValue !== null && prevValue > 0) { + const growth = ((currentValue - prevValue) / prevValue) * 100; + // Flag if growth is >200% or < -50% (unusual for year-over-year) + if (growth > 200 || growth < -50) { + logger.warn('Detected unusual revenue growth pattern - may indicate misaligned columns', { + period, + prevPeriod, + currentValue: currentValue, + prevValue: prevValue, + growth: `${growth.toFixed(1)}%`, + reason: `Unusual growth (${growth > 0 ? '+' : ''}${growth.toFixed(1)}%) between ${prevPeriod} and ${period} - may indicate column misalignment` + }); + // Don't reject - just log as warning, as this might be legitimate } } } } } + } // Validate EBITDA - should be reasonable if (periodData.ebitda && periodData.ebitda !== 'Not specified in CIM') { @@ -620,40 +634,83 @@ Focus on finding these specific fields in the document. Extract exact values, nu const marginMatch = marginStr.match(/(-?\d+(?:\.\d+)?)/); if (marginMatch) { const marginValue = parseFloat(marginMatch[1]); - // Reject margins outside reasonable range (-10% to 60%) - // Negative margins are possible but should be within reason - if (marginValue < -10 || marginValue > 60) { - logger.warn('Rejecting invalid EBITDA margin', { - period, - value: marginStr, - numericValue: marginValue, - reason: `Margin (${marginValue}%) outside reasonable range (-10% to 60%)` - }); - periodData.ebitdaMargin = 'Not specified in CIM'; - } else { - // Cross-validate: Check margin consistency with revenue and EBITDA - const revValue = extractNumericValue(periodData.revenue || ''); - const ebitdaValue = extractNumericValue(periodData.ebitda || ''); - if (revValue !== null && ebitdaValue !== null && revValue > 0) { - const calculatedMargin = (ebitdaValue / revValue) * 100; - const marginDiff = Math.abs(calculatedMargin - marginValue); - // If margin difference is > 10 percentage points, flag it - if (marginDiff > 10) { - logger.warn('EBITDA margin mismatch detected', { + + // First, try to calculate margin from revenue and EBITDA to validate + const revValue = extractNumericValue(periodData.revenue || ''); + const ebitdaValue = extractNumericValue(periodData.ebitda || ''); + + if (revValue !== null && ebitdaValue !== null && revValue > 0) { + const calculatedMargin = (ebitdaValue / revValue) * 100; + const marginDiff = Math.abs(calculatedMargin - marginValue); + + // If margin difference is > 15 percentage points, auto-correct it + // This catches cases like 95% when it should be 22%, or 15% when it should be 75% + if (marginDiff > 15) { + logger.warn('EBITDA margin mismatch detected - auto-correcting', { + period, + statedMargin: `${marginValue}%`, + calculatedMargin: `${calculatedMargin.toFixed(1)}%`, + difference: `${marginDiff.toFixed(1)}pp`, + revenue: periodData.revenue, + ebitda: periodData.ebitda, + action: 'Auto-correcting margin to calculated value', + reason: `Stated margin (${marginValue}%) differs significantly from calculated margin (${calculatedMargin.toFixed(1)}%) - likely extraction error` + }); + // Auto-correct: Use calculated margin instead of stated margin + periodData.ebitdaMargin = `${calculatedMargin.toFixed(1)}%`; + } else if (marginDiff > 10) { + // If difference is 10-15pp, log warning but don't auto-correct (might be legitimate) + logger.warn('EBITDA margin mismatch detected', { + period, + statedMargin: `${marginValue}%`, + calculatedMargin: `${calculatedMargin.toFixed(1)}%`, + difference: `${marginDiff.toFixed(1)}pp`, + revenue: periodData.revenue, + ebitda: periodData.ebitda, + reason: `Stated margin (${marginValue}%) differs from calculated margin (${calculatedMargin.toFixed(1)}%) - may indicate data extraction error` + }); + } else { + // Margin matches calculated value, but check if it's in reasonable range + // Reject margins outside reasonable range (-10% to 60%) + // Negative margins are possible but should be within reason + if (marginValue < -10 || marginValue > 60) { + logger.warn('EBITDA margin outside reasonable range - using calculated value', { period, - statedMargin: `${marginValue}%`, + value: marginStr, + numericValue: marginValue, calculatedMargin: `${calculatedMargin.toFixed(1)}%`, - difference: `${marginDiff.toFixed(1)}pp`, - revenue: periodData.revenue, - ebitda: periodData.ebitda, - reason: `Stated margin (${marginValue}%) differs significantly from calculated margin (${calculatedMargin.toFixed(1)}%) - may indicate data extraction error` + reason: `Stated margin (${marginValue}%) outside reasonable range (-10% to 60%), but calculated margin (${calculatedMargin.toFixed(1)}%) is valid - using calculated` }); - // Don't reject - just log as warning + // Use calculated margin if it's in reasonable range + if (calculatedMargin >= -10 && calculatedMargin <= 60) { + periodData.ebitdaMargin = `${calculatedMargin.toFixed(1)}%`; + } else { + periodData.ebitdaMargin = 'Not specified in CIM'; + } } } + } else { + // Can't calculate margin, so just check if stated margin is in reasonable range + if (marginValue < -10 || marginValue > 60) { + logger.warn('Rejecting invalid EBITDA margin', { + period, + value: marginStr, + numericValue: marginValue, + reason: `Margin (${marginValue}%) outside reasonable range (-10% to 60%)` + }); + periodData.ebitdaMargin = 'Not specified in CIM'; + } + } + + // Check margin consistency across periods (margins should be relatively stable) + if (periodData.ebitdaMargin && periodData.ebitdaMargin !== 'Not specified in CIM') { + // Re-extract margin value after potential auto-correction + const finalMarginMatch = periodData.ebitdaMargin.match(/(-?\d+(?:\.\d+)?)/); + const finalMarginValue = finalMarginMatch ? parseFloat(finalMarginMatch[1]) : marginValue; - // Check margin consistency across periods (margins should be relatively stable) - const otherMargins = otherPeriods + // Get other periods for cross-period validation + const otherPeriodsForMargin = periods.filter(p => p !== period && financials[p]?.ebitdaMargin); + const otherMargins = otherPeriodsForMargin .map(p => { const margin = financials[p]?.ebitdaMargin; if (!margin || margin === 'Not specified in CIM') return null; @@ -664,15 +721,15 @@ Focus on finding these specific fields in the document. Extract exact values, nu if (otherMargins.length > 0) { const avgOtherMargin = otherMargins.reduce((a, b) => a + b, 0) / otherMargins.length; - const marginDiff = Math.abs(marginValue - avgOtherMargin); + const marginDiff = Math.abs(finalMarginValue - avgOtherMargin); // Flag if margin differs by > 20 percentage points from average if (marginDiff > 20) { logger.warn('EBITDA margin inconsistency across periods', { period, - margin: `${marginValue}%`, + margin: `${finalMarginValue}%`, avgOtherPeriods: `${avgOtherMargin.toFixed(1)}%`, difference: `${marginDiff.toFixed(1)}pp`, - reason: `Margin for ${period} (${marginValue}%) differs significantly from average of other periods (${avgOtherMargin.toFixed(1)}%) - may indicate extraction error` + reason: `Margin for ${period} (${finalMarginValue}%) differs significantly from average of other periods (${avgOtherMargin.toFixed(1)}%) - may indicate extraction error` }); // Don't reject - just log as warning }