Files
cim_summary/backend/src/services/simpleDocumentProcessor.ts
admin ecd4b13115 Fix EBITDA margin auto-correction and TypeScript compilation error
- Added auto-correction logic for EBITDA margins when difference >15pp
- Fixed missing closing brace in revenue validation block
- Enhanced margin validation to catch cases like 95% -> 22.3%
2025-11-10 15:53:17 -05:00

789 lines
30 KiB
TypeScript

import { logger } from '../utils/logger';
import { config } from '../config/env';
import { documentAiProcessor } from './documentAiProcessor';
import { llmService } from './llmService';
import { CIMReview } from './llmSchemas';
import { cimReviewSchema } from './llmSchemas';
import { defaultCIMReview } from './unifiedDocumentProcessor';
interface ProcessingResult {
success: boolean;
summary: string;
analysisData: CIMReview;
processingStrategy: 'simple_full_document';
processingTime: number;
apiCalls: number;
error: string | undefined;
}
/**
* Simple Document Processor
*
* Strategy: Extract full text, send entire document to LLM in 1-2 passes
* - Pass 1: Full extraction with comprehensive prompt
* - Pass 2 (if needed): Validation and gap-filling
*
* This is simpler, faster, and more reliable than complex RAG chunking.
*/
class SimpleDocumentProcessor {
/**
* Process document using simple full-document approach
*/
async processDocument(
documentId: string,
userId: string,
text: string,
options: any = {}
): Promise<ProcessingResult> {
const startTime = Date.now();
let apiCalls = 0;
try {
logger.info('Simple processor: Starting', {
documentId,
textProvided: !!text && text.length > 0,
textLength: text.length,
hasFileBuffer: !!options.fileBuffer,
hasFileName: !!options.fileName
});
// Step 1: Extract text if not provided
let extractedText = text;
if (!extractedText || extractedText.length === 0) {
const { fileBuffer, fileName, mimeType } = options;
if (!fileBuffer || !fileName || !mimeType) {
throw new Error('Missing required options: fileBuffer, fileName, mimeType');
}
logger.info('Extracting text with Document AI (text only, no RAG)', { documentId, fileName });
const extractionResult = await documentAiProcessor.extractTextOnly(
documentId,
userId,
fileBuffer,
fileName,
mimeType
);
if (!extractionResult || !extractionResult.text) {
throw new Error(`Document AI text extraction failed`);
}
extractedText = extractionResult.text;
logger.info('Text extraction completed', {
documentId,
textLength: extractedText.length
});
}
// Step 2: Run deterministic parser first
let deterministicFinancials: any = null;
try {
const { parseFinancialsFromText } = await import('./financialTableParser');
const parsedFinancials = parseFinancialsFromText(extractedText);
// Check if parser found structured data
const hasData = parsedFinancials.fy3?.revenue || parsedFinancials.fy2?.revenue ||
parsedFinancials.fy1?.revenue || parsedFinancials.ltm?.revenue;
if (hasData) {
deterministicFinancials = parsedFinancials;
logger.info('Deterministic financial parser found structured data', {
documentId,
fy3: parsedFinancials.fy3,
fy2: parsedFinancials.fy2,
fy1: parsedFinancials.fy1,
ltm: parsedFinancials.ltm
});
} else {
logger.info('Deterministic financial parser did not find structured data', { documentId });
}
} catch (parserError) {
logger.warn('Deterministic financial parser failed', {
documentId,
error: parserError instanceof Error ? parserError.message : String(parserError)
});
}
// Step 3: Financial extraction (focused prompt)
logger.info('Step 3: Focused financial extraction', {
documentId,
hasParserResults: !!deterministicFinancials
});
let financialData: CIMReview['financialSummary'] | null = null;
try {
const financialResult = await llmService.processFinancialsOnly(
extractedText,
deterministicFinancials || undefined
);
apiCalls += 1;
if (financialResult.success && financialResult.jsonOutput?.financialSummary) {
financialData = financialResult.jsonOutput.financialSummary;
logger.info('Financial extraction completed successfully', {
documentId,
hasFinancials: !!financialData.financials
});
} else {
logger.warn('Financial extraction failed, will try in main extraction', {
documentId,
error: financialResult.error
});
}
} catch (financialError) {
logger.warn('Financial extraction threw error, will try in main extraction', {
documentId,
error: financialError instanceof Error ? financialError.message : String(financialError)
});
}
// Step 4: Pass 1 - Full extraction with entire document (excluding financials if we already have them)
logger.info('Step 4: Full document extraction (excluding financials if already extracted)', {
documentId,
textLength: extractedText.length,
estimatedTokens: Math.ceil(extractedText.length / 4),
hasFinancialData: !!financialData
});
const pass1Result = await llmService.processCIMDocument(
extractedText,
'BPCP CIM Review Template'
);
apiCalls += 1;
if (!pass1Result.success || !pass1Result.jsonOutput) {
throw new Error(`Pass 1 extraction failed: ${pass1Result.error || 'Unknown error'}`);
}
let analysisData = pass1Result.jsonOutput as CIMReview;
// Merge financial data if we extracted it separately
if (financialData) {
analysisData.financialSummary = financialData;
logger.info('Merged financial data from focused extraction', { documentId });
}
// Step 5: Validate and identify missing fields
const validation = this.validateData(analysisData);
logger.info('Pass 1 validation completed', {
documentId,
completeness: validation.completenessScore.toFixed(1) + '%',
emptyFields: validation.emptyFields.length,
totalFields: validation.totalFields,
filledFields: validation.filledFields
});
// Step 6: Pass 2 - Gap-filling if completeness < 90%
if (validation.completenessScore < 90 && validation.emptyFields.length > 0) {
logger.info('Pass 2: Gap-filling for missing fields', {
documentId,
missingFields: validation.emptyFields.length,
sampleFields: validation.emptyFields.slice(0, 5)
});
// Create focused prompt for missing fields
const missingFieldsList = validation.emptyFields.slice(0, 20).join(', ');
const gapFillPrompt = `The following fields are missing or incomplete. Please extract them from the document:
${missingFieldsList}
Focus on finding these specific fields in the document. Extract exact values, numbers, and details.`;
const pass2Result = await llmService.processCIMDocument(
extractedText,
'BPCP CIM Review Template',
analysisData,
validation.emptyFields.slice(0, 20), // focusedFields
gapFillPrompt // extractionInstructions
);
apiCalls += 1;
if (pass2Result.success && pass2Result.jsonOutput) {
// Merge pass 2 results into pass 1, preferring pass 2 values for missing fields
analysisData = this.mergeResults(analysisData, pass2Result.jsonOutput as CIMReview, validation.emptyFields);
// Re-validate
const finalValidation = this.validateData(analysisData);
logger.info('Pass 2 validation completed', {
documentId,
completeness: finalValidation.completenessScore.toFixed(1) + '%',
emptyFields: finalValidation.emptyFields.length
});
}
}
// Step 7: Generate summary
const summary = this.generateSummary(analysisData);
// Step 8: Final validation
const finalValidation = this.validateData(analysisData);
const processingTime = Date.now() - startTime;
logger.info('Simple processing completed', {
documentId,
completeness: finalValidation.completenessScore.toFixed(1) + '%',
totalFields: finalValidation.totalFields,
filledFields: finalValidation.filledFields,
emptyFields: finalValidation.emptyFields.length,
apiCalls,
processingTimeMs: processingTime
});
return {
success: true,
summary,
analysisData,
processingStrategy: 'simple_full_document',
processingTime,
apiCalls,
error: undefined
};
} catch (error) {
const processingTime = Date.now() - startTime;
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
logger.error('Simple processing failed', {
documentId,
error: errorMessage,
processingTimeMs: processingTime
});
return {
success: false,
summary: '',
analysisData: defaultCIMReview,
processingStrategy: 'simple_full_document',
processingTime,
apiCalls,
error: errorMessage
};
}
}
/**
* Merge pass 2 results into pass 1, preferring pass 2 for missing fields
*/
private mergeResults(
pass1: CIMReview,
pass2: CIMReview,
missingFields: string[]
): CIMReview {
const merged = JSON.parse(JSON.stringify(pass1)) as CIMReview;
for (const fieldPath of missingFields) {
const value = this.getNestedValue(pass2, fieldPath);
if (value && value !== '' && value !== 'Not specified in CIM') {
this.setNestedValue(merged, fieldPath, value);
}
}
return merged;
}
/**
* Get nested value by path (e.g., "dealOverview.dealSource")
*/
private getNestedValue(obj: any, path: string): any {
const keys = path.split('.');
let current = obj;
for (const key of keys) {
if (current && typeof current === 'object' && key in current) {
current = current[key];
} else {
return undefined;
}
}
return current;
}
/**
* Set nested value by path
*/
private setNestedValue(obj: any, path: string, value: any): void {
const keys = path.split('.');
let current = obj;
for (let i = 0; i < keys.length - 1; i++) {
const key = keys[i];
if (!(key in current) || typeof current[key] !== 'object') {
current[key] = {};
}
current = current[key];
}
current[keys[keys.length - 1]] = value;
}
/**
* Validate data and calculate completeness
*/
private validateData(data: CIMReview): {
isValid: boolean;
completenessScore: number;
totalFields: number;
filledFields: number;
emptyFields: string[];
issues: string[];
} {
const emptyFields: string[] = [];
const issues: string[] = [];
let totalFields = 0;
let filledFields = 0;
// BPCP internal fields (not in CIM)
const bpcpInternalFields = [
'dealOverview.reviewers',
'dealOverview.dateReviewed',
'dealOverview.dateCIMReceived',
];
// Optional fields (allowed to be empty)
const optionalFields = [
'dealOverview.transactionType',
'dealOverview.statedReasonForSale',
'businessDescription.customerBaseOverview.customerConcentrationRisk',
'businessDescription.customerBaseOverview.typicalContractLength',
];
const isBpcpInternalField = (path: string): boolean => {
return bpcpInternalFields.some(field => path === field || path.startsWith(field + '.'));
};
const isOptionalField = (path: string): boolean => {
return optionalFields.some(field => path === field || path.startsWith(field + '.'));
};
const checkValue = (value: any, path: string = ''): void => {
// Skip BPCP internal fields
if (isBpcpInternalField(path)) {
return;
}
if (value === null || value === undefined) {
if (!isOptionalField(path)) {
emptyFields.push(path);
}
totalFields++;
return;
}
if (typeof value === 'string') {
totalFields++;
const trimmed = value.trim();
if (trimmed === '' || trimmed === 'Not specified in CIM') {
if (!isOptionalField(path)) {
emptyFields.push(path);
} else {
filledFields++; // Count optional fields as filled even if "Not specified"
}
return;
}
// Check minimum length (except for short fields like page count)
const shortFields = ['dealOverview.cimPageCount'];
const isShortField = shortFields.some(field => path === field || path.startsWith(field + '.'));
if (!isShortField && trimmed.length < 10) {
issues.push(`${path}: Too short (${trimmed.length} chars, min 10)`);
}
filledFields++;
} else if (typeof value === 'object' && value !== null && !Array.isArray(value)) {
Object.keys(value).forEach(key => {
checkValue(value[key], path ? `${path}.${key}` : key);
});
}
};
checkValue(data);
const completenessScore = totalFields > 0
? (filledFields / totalFields) * 100
: 0;
// Validate schema
const schemaValidation = cimReviewSchema.safeParse(data);
const isValid = schemaValidation.success;
if (!isValid) {
issues.push(`Schema validation failed: ${schemaValidation.error?.errors.map(e => e.message).join(', ')}`);
}
return {
isValid,
completenessScore,
totalFields,
filledFields,
emptyFields,
issues
};
}
/**
* Generate summary from analysis data
*/
/**
* Validate and fix financial data - reject obviously wrong values
*/
private validateAndFixFinancialData(data: CIMReview): CIMReview {
if (!data.financialSummary?.financials) {
return data;
}
const financials = data.financialSummary.financials;
const periods: Array<'fy3' | 'fy2' | 'fy1' | 'ltm'> = ['fy3', 'fy2', 'fy1', 'ltm'];
// Helper to check if a financial value is obviously wrong
const isInvalidValue = (value: string, fieldType: 'revenue' | 'ebitda' = 'revenue'): boolean => {
const trimmed = value.trim();
// Reject very short values (likely extraction errors)
if (trimmed.length < 3) return true;
// Reject specific known wrong patterns
const invalidPatterns = [
/^\$?3\.?0?0?$/, // "$3", "$3.00", "3"
/^\$?10\.?0?0?$/, // "$10", "10" (too small)
/^-\d+M$/, // "-25M", "-5M"
/^\$-?\d+M$/, // "$-25M", "$-5M"
/^\$?\d{1,2}$/, // Single or double digit dollar amounts (too small)
];
if (invalidPatterns.some(pattern => pattern.test(trimmed))) {
return true;
}
// Additional check: reject values that are too small for target companies
const numericValue = extractNumericValue(trimmed);
if (numericValue !== null) {
// Revenue should be at least $5M for target companies
if (fieldType === 'revenue' && numericValue < 5000000) {
return true;
}
// EBITDA should be at least $500K for target companies
if (fieldType === 'ebitda' && Math.abs(numericValue) < 500000) {
return true;
}
}
return false;
};
// Helper to extract numeric value from financial string
const extractNumericValue = (value: string): number | null => {
// Remove currency symbols, commas, parentheses
let cleaned = value.replace(/[$,\s()]/g, '');
// Handle K, M, B suffixes
let multiplier = 1;
if (cleaned.toLowerCase().endsWith('k')) {
multiplier = 1000;
cleaned = cleaned.slice(0, -1);
} else if (cleaned.toLowerCase().endsWith('m')) {
multiplier = 1000000;
cleaned = cleaned.slice(0, -1);
} else if (cleaned.toLowerCase().endsWith('b')) {
multiplier = 1000000000;
cleaned = cleaned.slice(0, -1);
}
// Check for negative
const isNegative = cleaned.startsWith('-');
if (isNegative) cleaned = cleaned.substring(1);
const num = parseFloat(cleaned);
if (isNaN(num)) return null;
return (isNegative ? -1 : 1) * num * multiplier;
};
periods.forEach(period => {
const periodData = financials[period];
if (!periodData) return;
// Validate revenue - should be reasonable (typically $10M-$1B+ for target companies)
if (periodData.revenue && periodData.revenue !== 'Not specified in CIM') {
if (isInvalidValue(periodData.revenue, 'revenue')) {
logger.warn('Rejecting invalid revenue value', {
period,
value: periodData.revenue,
reason: 'Value is clearly wrong (too small or invalid pattern)'
});
periodData.revenue = 'Not specified in CIM';
} else {
// Additional validation: check if numeric value is reasonable
const numericValue = extractNumericValue(periodData.revenue);
if (numericValue !== null) {
// Revenue should typically be at least $5M for a target company
// Reject if less than $5M (likely extraction error or wrong column)
if (Math.abs(numericValue) < 5000000) {
logger.warn('Rejecting revenue value - too small', {
period,
value: periodData.revenue,
numericValue,
reason: 'Revenue value is unreasonably small (<$5M) - likely wrong column or extraction error'
});
periodData.revenue = 'Not specified in CIM';
}
}
}
}
// Cross-validate: Check consistency across periods
// Enhanced validation: Check trends and detect misaligned columns
const otherPeriods = periods.filter(p => p !== period && financials[p]?.revenue);
if (otherPeriods.length > 0 && periodData.revenue && periodData.revenue !== 'Not specified in CIM') {
const currentValue = extractNumericValue(periodData.revenue);
if (currentValue !== null && currentValue > 0) {
const otherValues = otherPeriods
.map(p => {
const val = extractNumericValue(financials[p]!.revenue || '');
return val !== null && val > 0 ? { period: p as 'fy3' | 'fy2' | 'fy1' | 'ltm', value: val } : null;
})
.filter((v): v is { period: 'fy3' | 'fy2' | 'fy1' | 'ltm'; value: number } => v !== null);
if (otherValues.length > 0) {
const avgOtherValue = otherValues.reduce((a, b) => a + b.value, 0) / otherValues.length;
const maxOtherValue = Math.max(...otherValues.map(v => v.value));
const minOtherValue = Math.min(...otherValues.map(v => v.value));
// Check 1: Value is too small compared to other periods (likely wrong column)
if (currentValue < avgOtherValue * 0.2) {
logger.warn('Rejecting revenue value - inconsistent with other periods', {
period,
value: periodData.revenue,
numericValue: currentValue,
avgOtherPeriods: avgOtherValue,
maxOtherPeriods: maxOtherValue,
minOtherPeriods: minOtherValue,
reason: `Value ($${(currentValue / 1000000).toFixed(1)}M) is <20% of average ($${(avgOtherValue / 1000000).toFixed(1)}M) - likely wrong column or misaligned extraction`
});
periodData.revenue = 'Not specified in CIM';
}
// Check 2: Revenue should generally increase or be stable (FY-1/LTM shouldn't be much lower than FY-2/FY-3)
// Exception: If this is FY-3 and others are higher, that's normal
if (period !== 'fy3' && currentValue < minOtherValue * 0.5 && currentValue < avgOtherValue * 0.6) {
logger.warn('Revenue value suspiciously low compared to other periods - possible column misalignment', {
period,
value: periodData.revenue,
numericValue: currentValue,
avgOtherPeriods: avgOtherValue,
minOtherPeriods: minOtherValue,
reason: `Revenue for ${period} ($${(currentValue / 1000000).toFixed(1)}M) is <50% of minimum other period ($${(minOtherValue / 1000000).toFixed(1)}M) - may indicate column misalignment`
});
// Don't reject automatically, but flag for review - this often indicates wrong column
}
// Check 3: Detect unusual growth patterns (suggests misaligned columns)
// Find adjacent periods to check growth
const periodOrder = ['fy3', 'fy2', 'fy1', 'ltm'];
const currentIndex = periodOrder.indexOf(period);
if (currentIndex > 0) {
const prevPeriod = periodOrder[currentIndex - 1];
const prevValue = extractNumericValue(financials[prevPeriod]?.revenue || '');
if (prevValue !== null && prevValue > 0) {
const growth = ((currentValue - prevValue) / prevValue) * 100;
// Flag if growth is >200% or < -50% (unusual for year-over-year)
if (growth > 200 || growth < -50) {
logger.warn('Detected unusual revenue growth pattern - may indicate misaligned columns', {
period,
prevPeriod,
currentValue: currentValue,
prevValue: prevValue,
growth: `${growth.toFixed(1)}%`,
reason: `Unusual growth (${growth > 0 ? '+' : ''}${growth.toFixed(1)}%) between ${prevPeriod} and ${period} - may indicate column misalignment`
});
// Don't reject - just log as warning, as this might be legitimate
}
}
}
}
}
}
// Validate EBITDA - should be reasonable
if (periodData.ebitda && periodData.ebitda !== 'Not specified in CIM') {
if (isInvalidValue(periodData.ebitda, 'ebitda')) {
logger.warn('Rejecting invalid EBITDA value', {
period,
value: periodData.ebitda,
reason: 'Value is clearly wrong (too small or invalid pattern)'
});
periodData.ebitda = 'Not specified in CIM';
} else {
// EBITDA can be negative, but should be reasonable in magnitude
const numericValue = extractNumericValue(periodData.ebitda);
if (numericValue !== null) {
// Reject if absolute value is less than $1K (likely extraction error)
if (Math.abs(numericValue) < 1000) {
logger.warn('Rejecting EBITDA value - too small', {
period,
value: periodData.ebitda,
numericValue,
reason: 'EBITDA value is unreasonably small'
});
periodData.ebitda = 'Not specified in CIM';
}
}
}
}
// Validate margins - should be reasonable percentages and consistent across periods
if (periodData.ebitdaMargin && periodData.ebitdaMargin !== 'Not specified in CIM') {
const marginStr = periodData.ebitdaMargin.trim();
// Extract numeric value
const marginMatch = marginStr.match(/(-?\d+(?:\.\d+)?)/);
if (marginMatch) {
const marginValue = parseFloat(marginMatch[1]);
// First, try to calculate margin from revenue and EBITDA to validate
const revValue = extractNumericValue(periodData.revenue || '');
const ebitdaValue = extractNumericValue(periodData.ebitda || '');
if (revValue !== null && ebitdaValue !== null && revValue > 0) {
const calculatedMargin = (ebitdaValue / revValue) * 100;
const marginDiff = Math.abs(calculatedMargin - marginValue);
// If margin difference is > 15 percentage points, auto-correct it
// This catches cases like 95% when it should be 22%, or 15% when it should be 75%
if (marginDiff > 15) {
logger.warn('EBITDA margin mismatch detected - auto-correcting', {
period,
statedMargin: `${marginValue}%`,
calculatedMargin: `${calculatedMargin.toFixed(1)}%`,
difference: `${marginDiff.toFixed(1)}pp`,
revenue: periodData.revenue,
ebitda: periodData.ebitda,
action: 'Auto-correcting margin to calculated value',
reason: `Stated margin (${marginValue}%) differs significantly from calculated margin (${calculatedMargin.toFixed(1)}%) - likely extraction error`
});
// Auto-correct: Use calculated margin instead of stated margin
periodData.ebitdaMargin = `${calculatedMargin.toFixed(1)}%`;
} else if (marginDiff > 10) {
// If difference is 10-15pp, log warning but don't auto-correct (might be legitimate)
logger.warn('EBITDA margin mismatch detected', {
period,
statedMargin: `${marginValue}%`,
calculatedMargin: `${calculatedMargin.toFixed(1)}%`,
difference: `${marginDiff.toFixed(1)}pp`,
revenue: periodData.revenue,
ebitda: periodData.ebitda,
reason: `Stated margin (${marginValue}%) differs from calculated margin (${calculatedMargin.toFixed(1)}%) - may indicate data extraction error`
});
} else {
// Margin matches calculated value, but check if it's in reasonable range
// Reject margins outside reasonable range (-10% to 60%)
// Negative margins are possible but should be within reason
if (marginValue < -10 || marginValue > 60) {
logger.warn('EBITDA margin outside reasonable range - using calculated value', {
period,
value: marginStr,
numericValue: marginValue,
calculatedMargin: `${calculatedMargin.toFixed(1)}%`,
reason: `Stated margin (${marginValue}%) outside reasonable range (-10% to 60%), but calculated margin (${calculatedMargin.toFixed(1)}%) is valid - using calculated`
});
// Use calculated margin if it's in reasonable range
if (calculatedMargin >= -10 && calculatedMargin <= 60) {
periodData.ebitdaMargin = `${calculatedMargin.toFixed(1)}%`;
} else {
periodData.ebitdaMargin = 'Not specified in CIM';
}
}
}
} else {
// Can't calculate margin, so just check if stated margin is in reasonable range
if (marginValue < -10 || marginValue > 60) {
logger.warn('Rejecting invalid EBITDA margin', {
period,
value: marginStr,
numericValue: marginValue,
reason: `Margin (${marginValue}%) outside reasonable range (-10% to 60%)`
});
periodData.ebitdaMargin = 'Not specified in CIM';
}
}
// Check margin consistency across periods (margins should be relatively stable)
if (periodData.ebitdaMargin && periodData.ebitdaMargin !== 'Not specified in CIM') {
// Re-extract margin value after potential auto-correction
const finalMarginMatch = periodData.ebitdaMargin.match(/(-?\d+(?:\.\d+)?)/);
const finalMarginValue = finalMarginMatch ? parseFloat(finalMarginMatch[1]) : marginValue;
// Get other periods for cross-period validation
const otherPeriodsForMargin = periods.filter(p => p !== period && financials[p]?.ebitdaMargin);
const otherMargins = otherPeriodsForMargin
.map(p => {
const margin = financials[p]?.ebitdaMargin;
if (!margin || margin === 'Not specified in CIM') return null;
const match = margin.match(/(-?\d+(?:\.\d+)?)/);
return match ? parseFloat(match[1]) : null;
})
.filter((v): v is number => v !== null);
if (otherMargins.length > 0) {
const avgOtherMargin = otherMargins.reduce((a, b) => a + b, 0) / otherMargins.length;
const marginDiff = Math.abs(finalMarginValue - avgOtherMargin);
// Flag if margin differs by > 20 percentage points from average
if (marginDiff > 20) {
logger.warn('EBITDA margin inconsistency across periods', {
period,
margin: `${finalMarginValue}%`,
avgOtherPeriods: `${avgOtherMargin.toFixed(1)}%`,
difference: `${marginDiff.toFixed(1)}pp`,
reason: `Margin for ${period} (${finalMarginValue}%) differs significantly from average of other periods (${avgOtherMargin.toFixed(1)}%) - may indicate extraction error`
});
// Don't reject - just log as warning
}
}
}
}
}
// Validate revenue growth - should be reasonable percentage
if (periodData.revenueGrowth && periodData.revenueGrowth !== 'Not specified in CIM' && periodData.revenueGrowth !== 'N/A') {
const growthStr = periodData.revenueGrowth.trim();
const growthMatch = growthStr.match(/(-?\d+(?:\.\d+)?)/);
if (growthMatch) {
const growthValue = parseFloat(growthMatch[1]);
// Reject growth rates outside reasonable range (-50% to 500%)
if (growthValue < -50 || growthValue > 500) {
logger.warn('Rejecting invalid revenue growth', {
period,
value: growthStr,
numericValue: growthValue,
reason: 'Growth rate outside reasonable range'
});
periodData.revenueGrowth = 'Not specified in CIM';
}
}
}
});
return data;
}
private generateSummary(data: CIMReview): string {
const parts: string[] = [];
if (data.dealOverview?.targetCompanyName) {
parts.push(`Target: ${data.dealOverview.targetCompanyName}`);
}
if (data.dealOverview?.industrySector) {
parts.push(`Industry: ${data.dealOverview.industrySector}`);
}
if (data.dealOverview?.geography) {
parts.push(`Location: ${data.dealOverview.geography}`);
}
if (data.financialSummary?.financials?.ltm?.revenue) {
parts.push(`LTM Revenue: ${data.financialSummary.financials.ltm.revenue}`);
}
if (data.financialSummary?.financials?.ltm?.ebitda) {
parts.push(`LTM EBITDA: ${data.financialSummary.financials.ltm.ebitda}`);
}
return parts.join(' | ') || 'CIM analysis completed';
}
}
export const simpleDocumentProcessor = new SimpleDocumentProcessor();