Files
cim_summary/backend/debug-llm-service.js
Jon 57770fd99d feat: Implement hybrid LLM approach with enhanced prompts for CIM analysis
🎯 Major Features:
- Hybrid LLM configuration: Claude 3.7 Sonnet (primary) + GPT-4.5 (fallback)
- Task-specific model selection for optimal performance
- Enhanced prompts for all analysis types with proven results

🔧 Technical Improvements:
- Enhanced financial analysis with fiscal year mapping (100% success rate)
- Business model analysis with scalability assessment
- Market positioning analysis with TAM/SAM extraction
- Management team assessment with succession planning
- Creative content generation with GPT-4.5

📊 Performance & Cost Optimization:
- Claude 3.7 Sonnet: /5 per 1M tokens (82.2% MATH score)
- GPT-4.5: Premium creative content (5/50 per 1M tokens)
- ~80% cost savings using Claude for analytical tasks
- Automatic fallback system for reliability

 Proven Results:
- Successfully extracted 3-year financial data from STAX CIM
- Correctly mapped fiscal years (2023→FY-3, 2024→FY-2, 2025E→FY-1, LTM Mar-25→LTM)
- Identified revenue: 4M→1M→1M→6M (LTM)
- Identified EBITDA: 8.9M→3.9M→1M→7.2M (LTM)

🚀 Files Added/Modified:
- Enhanced LLM service with task-specific model selection
- Updated environment configuration for hybrid approach
- Enhanced prompt builders for all analysis types
- Comprehensive testing scripts and documentation
- Updated frontend components for improved UX

📚 References:
- Eden AI Model Comparison: Claude 3.7 Sonnet vs GPT-4.5
- Artificial Analysis Benchmarks for performance metrics
- Cost optimization based on model strengths and pricing
2025-07-28 16:46:06 -04:00

220 lines
9.8 KiB
JavaScript

const { OpenAI } = require('openai');
require('dotenv').config();
const openai = new OpenAI({
apiKey: process.env.OPENAI_API_KEY,
});
function extractJsonFromResponse(content) {
try {
console.log('🔍 Extracting JSON from content...');
console.log('📄 Content preview:', content.substring(0, 200) + '...');
// First, try to find JSON within ```json ... ```
const jsonMatch = content.match(/```json\n([\s\S]*?)\n```/);
if (jsonMatch && jsonMatch[1]) {
console.log('✅ Found JSON in ```json block');
const parsed = JSON.parse(jsonMatch[1]);
console.log('✅ JSON parsed successfully');
return parsed;
}
// Try to find JSON within ``` ... ```
const codeBlockMatch = content.match(/```\n([\s\S]*?)\n```/);
if (codeBlockMatch && codeBlockMatch[1]) {
console.log('✅ Found JSON in ``` block');
const parsed = JSON.parse(codeBlockMatch[1]);
console.log('✅ JSON parsed successfully');
return parsed;
}
// If that fails, fall back to finding the first and last curly braces
const startIndex = content.indexOf('{');
const endIndex = content.lastIndexOf('}');
if (startIndex === -1 || endIndex === -1) {
throw new Error('No JSON object found in response');
}
console.log('✅ Found JSON using brace matching');
const jsonString = content.substring(startIndex, endIndex + 1);
const parsed = JSON.parse(jsonString);
console.log('✅ JSON parsed successfully');
return parsed;
} catch (error) {
console.error('❌ JSON extraction failed:', error.message);
console.error('📄 Full content:', content);
throw new Error(`JSON extraction failed: ${error instanceof Error ? error.message : 'Unknown error'}`);
}
}
async function testLLMService() {
try {
console.log('🤖 Testing LLM service logic...');
// Simulate the exact prompt from the service
const systemPrompt = `You are a financial analyst tasked with analyzing CIM (Confidential Information Memorandum) documents. You must respond with ONLY a valid JSON object that follows the exact structure provided. Do not include any other text, explanations, or markdown formatting.`;
const prompt = `Please analyze the following CIM document and generate a JSON object based on the provided structure.
CIM Document Text:
This is a test CIM document for STAX, a technology company focused on digital transformation solutions. The company operates in the software-as-a-service sector with headquarters in San Francisco, CA. STAX provides cloud-based enterprise software solutions to Fortune 500 companies.
Your response MUST be a single, valid JSON object that follows this exact structure. Do not include any other text.
JSON Structure to Follow:
\`\`\`json
{
"dealOverview": {
"targetCompanyName": "Target Company Name",
"industrySector": "Industry/Sector",
"geography": "Geography (HQ & Key Operations)",
"dealSource": "Deal Source",
"transactionType": "Transaction Type",
"dateCIMReceived": "Date CIM Received",
"dateReviewed": "Date Reviewed",
"reviewers": "Reviewer(s)",
"cimPageCount": "CIM Page Count",
"statedReasonForSale": "Stated Reason for Sale (if provided)"
},
"businessDescription": {
"coreOperationsSummary": "Core Operations Summary (3-5 sentences)",
"keyProductsServices": "Key Products/Services & Revenue Mix (Est. % if available)",
"uniqueValueProposition": "Unique Value Proposition (UVP) / Why Customers Buy",
"customerBaseOverview": {
"keyCustomerSegments": "Key Customer Segments/Types",
"customerConcentrationRisk": "Customer Concentration Risk (Top 5 and/or Top 10 Customers as % Revenue - if stated/inferable)",
"typicalContractLength": "Typical Contract Length / Recurring Revenue % (if applicable)"
},
"keySupplierOverview": {
"dependenceConcentrationRisk": "Dependence/Concentration Risk"
}
},
"marketIndustryAnalysis": {
"estimatedMarketSize": "Estimated Market Size (TAM/SAM - if provided)",
"estimatedMarketGrowthRate": "Estimated Market Growth Rate (% CAGR - Historical & Projected)",
"keyIndustryTrends": "Key Industry Trends & Drivers (Tailwinds/Headwinds)",
"competitiveLandscape": {
"keyCompetitors": "Key Competitors Identified",
"targetMarketPosition": "Target's Stated Market Position/Rank",
"basisOfCompetition": "Basis of Competition"
},
"barriersToEntry": "Barriers to Entry / Competitive Moat (Stated/Inferred)"
},
"financialSummary": {
"financials": {
"fy3": {
"revenue": "Revenue amount for FY-3",
"revenueGrowth": "N/A (baseline year)",
"grossProfit": "Gross profit amount for FY-3",
"grossMargin": "Gross margin % for FY-3",
"ebitda": "EBITDA amount for FY-3",
"ebitdaMargin": "EBITDA margin % for FY-3"
},
"fy2": {
"revenue": "Revenue amount for FY-2",
"revenueGrowth": "Revenue growth % for FY-2",
"grossProfit": "Gross profit amount for FY-2",
"grossMargin": "Gross margin % for FY-2",
"ebitda": "EBITDA amount for FY-2",
"ebitdaMargin": "EBITDA margin % for FY-2"
},
"fy1": {
"revenue": "Revenue amount for FY-1",
"revenueGrowth": "Revenue growth % for FY-1",
"grossProfit": "Gross profit amount for FY-1",
"grossMargin": "Gross margin % for FY-1",
"ebitda": "EBITDA amount for FY-1",
"ebitdaMargin": "EBITDA margin % for FY-1"
},
"ltm": {
"revenue": "Revenue amount for LTM",
"revenueGrowth": "Revenue growth % for LTM",
"grossProfit": "Gross profit amount for LTM",
"grossMargin": "Gross margin % for LTM",
"ebitda": "EBITDA amount for LTM",
"ebitdaMargin": "EBITDA margin % for LTM"
}
},
"qualityOfEarnings": "Quality of earnings/adjustments impression",
"revenueGrowthDrivers": "Revenue growth drivers (stated)",
"marginStabilityAnalysis": "Margin stability/trend analysis",
"capitalExpenditures": "Capital expenditures (LTM % of revenue)",
"workingCapitalIntensity": "Working capital intensity impression",
"freeCashFlowQuality": "Free cash flow quality impression"
},
"managementTeamOverview": {
"keyLeaders": "Key Leaders Identified (CEO, CFO, COO, Head of Sales, etc.)",
"managementQualityAssessment": "Initial Assessment of Quality/Experience (Based on Bios)",
"postTransactionIntentions": "Management's Stated Post-Transaction Role/Intentions (if mentioned)",
"organizationalStructure": "Organizational Structure Overview (Impression)"
},
"preliminaryInvestmentThesis": {
"keyAttractions": "Key Attractions / Strengths (Why Invest?)",
"potentialRisks": "Potential Risks / Concerns (Why Not Invest?)",
"valueCreationLevers": "Initial Value Creation Levers (How PE Adds Value)",
"alignmentWithFundStrategy": "Alignment with Fund Strategy (BPCP is focused on companies in 5+MM EBITDA range in consumer and industrial end markets. M&A, increased technology & data usage, supply chain and human capital optimization are key value-levers. Also a preference companies which are founder / family-owned and within driving distance of Cleveland and Charlotte.)"
},
"keyQuestionsNextSteps": {
"criticalQuestions": "Critical Questions Arising from CIM Review",
"missingInformation": "Key Missing Information / Areas for Diligence Focus",
"preliminaryRecommendation": "Preliminary Recommendation",
"rationaleForRecommendation": "Rationale for Recommendation (Brief)",
"proposedNextSteps": "Proposed Next Steps"
}
}
\`\`\`
IMPORTANT: Replace all placeholder text with actual information from the CIM document. If information is not available, use "Not specified in CIM". Ensure all financial metrics are properly formatted as strings.`;
const messages = [];
if (systemPrompt) {
messages.push({ role: 'system', content: systemPrompt });
}
messages.push({ role: 'user', content: prompt });
console.log('📤 Sending request to OpenAI...');
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages,
max_tokens: 4000,
temperature: 0.1,
});
console.log('📥 Received response from OpenAI');
const content = response.choices[0].message.content;
console.log('📄 Raw response content:');
console.log(content);
// Extract JSON
const jsonOutput = extractJsonFromResponse(content);
console.log('✅ JSON extraction successful');
console.log('📊 Extracted JSON structure:');
console.log('- dealOverview:', jsonOutput.dealOverview ? 'Present' : 'Missing');
console.log('- businessDescription:', jsonOutput.businessDescription ? 'Present' : 'Missing');
console.log('- marketIndustryAnalysis:', jsonOutput.marketIndustryAnalysis ? 'Present' : 'Missing');
console.log('- financialSummary:', jsonOutput.financialSummary ? 'Present' : 'Missing');
console.log('- managementTeamOverview:', jsonOutput.managementTeamOverview ? 'Present' : 'Missing');
console.log('- preliminaryInvestmentThesis:', jsonOutput.preliminaryInvestmentThesis ? 'Present' : 'Missing');
console.log('- keyQuestionsNextSteps:', jsonOutput.keyQuestionsNextSteps ? 'Present' : 'Missing');
// Test validation (simplified)
const requiredFields = [
'dealOverview', 'businessDescription', 'marketIndustryAnalysis',
'financialSummary', 'managementTeamOverview', 'preliminaryInvestmentThesis',
'keyQuestionsNextSteps'
];
const missingFields = requiredFields.filter(field => !jsonOutput[field]);
if (missingFields.length > 0) {
console.log('❌ Missing required fields:', missingFields);
} else {
console.log('✅ All required fields present');
}
} catch (error) {
console.error('❌ Error:', error.message);
}
}
testLLMService();